diff options
author | Christoph Hellwig <hch@lst.de> | 2019-08-28 10:19:54 -0400 |
---|---|---|
committer | Jason Gunthorpe <jgg@mellanox.com> | 2019-09-07 03:28:04 -0400 |
commit | 7b86ac3371b70c3fd8fd95501719beb1faab719f (patch) | |
tree | b7f61e4615d249563f09567a22ee399634c898dd | |
parent | a520110e4a15ceb385304d9cab22bb51438f6080 (diff) |
pagewalk: separate function pointers from iterator data
The mm_walk structure currently mixed data and code. Split out the
operations vectors into a new mm_walk_ops structure, and while we are
changing the API also declare the mm_walk structure inside the
walk_page_range and walk_page_vma functions.
Based on patch from Linus Torvalds.
Link: https://lore.kernel.org/r/20190828141955.22210-3-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
-rw-r--r-- | arch/openrisc/kernel/dma.c | 22 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/subpage_prot.c | 10 | ||||
-rw-r--r-- | arch/s390/mm/gmap.c | 33 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 78 | ||||
-rw-r--r-- | include/linux/pagewalk.h | 64 | ||||
-rw-r--r-- | mm/hmm.c | 23 | ||||
-rw-r--r-- | mm/madvise.c | 41 | ||||
-rw-r--r-- | mm/memcontrol.c | 23 | ||||
-rw-r--r-- | mm/mempolicy.c | 15 | ||||
-rw-r--r-- | mm/migrate.c | 23 | ||||
-rw-r--r-- | mm/mincore.c | 15 | ||||
-rw-r--r-- | mm/mprotect.c | 24 | ||||
-rw-r--r-- | mm/pagewalk.c | 124 |
13 files changed, 251 insertions, 244 deletions
diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index c7812e6effa2..4d5b8bd1d795 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c | |||
@@ -44,6 +44,10 @@ page_set_nocache(pte_t *pte, unsigned long addr, | |||
44 | return 0; | 44 | return 0; |
45 | } | 45 | } |
46 | 46 | ||
47 | static const struct mm_walk_ops set_nocache_walk_ops = { | ||
48 | .pte_entry = page_set_nocache, | ||
49 | }; | ||
50 | |||
47 | static int | 51 | static int |
48 | page_clear_nocache(pte_t *pte, unsigned long addr, | 52 | page_clear_nocache(pte_t *pte, unsigned long addr, |
49 | unsigned long next, struct mm_walk *walk) | 53 | unsigned long next, struct mm_walk *walk) |
@@ -59,6 +63,10 @@ page_clear_nocache(pte_t *pte, unsigned long addr, | |||
59 | return 0; | 63 | return 0; |
60 | } | 64 | } |
61 | 65 | ||
66 | static const struct mm_walk_ops clear_nocache_walk_ops = { | ||
67 | .pte_entry = page_clear_nocache, | ||
68 | }; | ||
69 | |||
62 | /* | 70 | /* |
63 | * Alloc "coherent" memory, which for OpenRISC means simply uncached. | 71 | * Alloc "coherent" memory, which for OpenRISC means simply uncached. |
64 | * | 72 | * |
@@ -81,10 +89,6 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
81 | { | 89 | { |
82 | unsigned long va; | 90 | unsigned long va; |
83 | void *page; | 91 | void *page; |
84 | struct mm_walk walk = { | ||
85 | .pte_entry = page_set_nocache, | ||
86 | .mm = &init_mm | ||
87 | }; | ||
88 | 92 | ||
89 | page = alloc_pages_exact(size, gfp | __GFP_ZERO); | 93 | page = alloc_pages_exact(size, gfp | __GFP_ZERO); |
90 | if (!page) | 94 | if (!page) |
@@ -99,7 +103,8 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
99 | * We need to iterate through the pages, clearing the dcache for | 103 | * We need to iterate through the pages, clearing the dcache for |
100 | * them and setting the cache-inhibit bit. | 104 | * them and setting the cache-inhibit bit. |
101 | */ | 105 | */ |
102 | if (walk_page_range(va, va + size, &walk)) { | 106 | if (walk_page_range(&init_mm, va, va + size, &set_nocache_walk_ops, |
107 | NULL)) { | ||
103 | free_pages_exact(page, size); | 108 | free_pages_exact(page, size); |
104 | return NULL; | 109 | return NULL; |
105 | } | 110 | } |
@@ -112,13 +117,10 @@ arch_dma_free(struct device *dev, size_t size, void *vaddr, | |||
112 | dma_addr_t dma_handle, unsigned long attrs) | 117 | dma_addr_t dma_handle, unsigned long attrs) |
113 | { | 118 | { |
114 | unsigned long va = (unsigned long)vaddr; | 119 | unsigned long va = (unsigned long)vaddr; |
115 | struct mm_walk walk = { | ||
116 | .pte_entry = page_clear_nocache, | ||
117 | .mm = &init_mm | ||
118 | }; | ||
119 | 120 | ||
120 | /* walk_page_range shouldn't be able to fail here */ | 121 | /* walk_page_range shouldn't be able to fail here */ |
121 | WARN_ON(walk_page_range(va, va + size, &walk)); | 122 | WARN_ON(walk_page_range(&init_mm, va, va + size, |
123 | &clear_nocache_walk_ops, NULL)); | ||
122 | 124 | ||
123 | free_pages_exact(vaddr, size); | 125 | free_pages_exact(vaddr, size); |
124 | } | 126 | } |
diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c index 236f0a861ecc..2ef24a53f4c9 100644 --- a/arch/powerpc/mm/book3s64/subpage_prot.c +++ b/arch/powerpc/mm/book3s64/subpage_prot.c | |||
@@ -139,14 +139,14 @@ static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr, | |||
139 | return 0; | 139 | return 0; |
140 | } | 140 | } |
141 | 141 | ||
142 | static const struct mm_walk_ops subpage_walk_ops = { | ||
143 | .pmd_entry = subpage_walk_pmd_entry, | ||
144 | }; | ||
145 | |||
142 | static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, | 146 | static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, |
143 | unsigned long len) | 147 | unsigned long len) |
144 | { | 148 | { |
145 | struct vm_area_struct *vma; | 149 | struct vm_area_struct *vma; |
146 | struct mm_walk subpage_proto_walk = { | ||
147 | .mm = mm, | ||
148 | .pmd_entry = subpage_walk_pmd_entry, | ||
149 | }; | ||
150 | 150 | ||
151 | /* | 151 | /* |
152 | * We don't try too hard, we just mark all the vma in that range | 152 | * We don't try too hard, we just mark all the vma in that range |
@@ -163,7 +163,7 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, | |||
163 | if (vma->vm_start >= (addr + len)) | 163 | if (vma->vm_start >= (addr + len)) |
164 | break; | 164 | break; |
165 | vma->vm_flags |= VM_NOHUGEPAGE; | 165 | vma->vm_flags |= VM_NOHUGEPAGE; |
166 | walk_page_vma(vma, &subpage_proto_walk); | 166 | walk_page_vma(vma, &subpage_walk_ops, NULL); |
167 | vma = vma->vm_next; | 167 | vma = vma->vm_next; |
168 | } | 168 | } |
169 | } | 169 | } |
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index cf80feae970d..bd78d504fdad 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c | |||
@@ -2521,13 +2521,9 @@ static int __zap_zero_pages(pmd_t *pmd, unsigned long start, | |||
2521 | return 0; | 2521 | return 0; |
2522 | } | 2522 | } |
2523 | 2523 | ||
2524 | static inline void zap_zero_pages(struct mm_struct *mm) | 2524 | static const struct mm_walk_ops zap_zero_walk_ops = { |
2525 | { | 2525 | .pmd_entry = __zap_zero_pages, |
2526 | struct mm_walk walk = { .pmd_entry = __zap_zero_pages }; | 2526 | }; |
2527 | |||
2528 | walk.mm = mm; | ||
2529 | walk_page_range(0, TASK_SIZE, &walk); | ||
2530 | } | ||
2531 | 2527 | ||
2532 | /* | 2528 | /* |
2533 | * switch on pgstes for its userspace process (for kvm) | 2529 | * switch on pgstes for its userspace process (for kvm) |
@@ -2546,7 +2542,7 @@ int s390_enable_sie(void) | |||
2546 | mm->context.has_pgste = 1; | 2542 | mm->context.has_pgste = 1; |
2547 | /* split thp mappings and disable thp for future mappings */ | 2543 | /* split thp mappings and disable thp for future mappings */ |
2548 | thp_split_mm(mm); | 2544 | thp_split_mm(mm); |
2549 | zap_zero_pages(mm); | 2545 | walk_page_range(mm, 0, TASK_SIZE, &zap_zero_walk_ops, NULL); |
2550 | up_write(&mm->mmap_sem); | 2546 | up_write(&mm->mmap_sem); |
2551 | return 0; | 2547 | return 0; |
2552 | } | 2548 | } |
@@ -2589,12 +2585,13 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, | |||
2589 | return 0; | 2585 | return 0; |
2590 | } | 2586 | } |
2591 | 2587 | ||
2588 | static const struct mm_walk_ops enable_skey_walk_ops = { | ||
2589 | .hugetlb_entry = __s390_enable_skey_hugetlb, | ||
2590 | .pte_entry = __s390_enable_skey_pte, | ||
2591 | }; | ||
2592 | |||
2592 | int s390_enable_skey(void) | 2593 | int s390_enable_skey(void) |
2593 | { | 2594 | { |
2594 | struct mm_walk walk = { | ||
2595 | .hugetlb_entry = __s390_enable_skey_hugetlb, | ||
2596 | .pte_entry = __s390_enable_skey_pte, | ||
2597 | }; | ||
2598 | struct mm_struct *mm = current->mm; | 2595 | struct mm_struct *mm = current->mm; |
2599 | struct vm_area_struct *vma; | 2596 | struct vm_area_struct *vma; |
2600 | int rc = 0; | 2597 | int rc = 0; |
@@ -2614,8 +2611,7 @@ int s390_enable_skey(void) | |||
2614 | } | 2611 | } |
2615 | mm->def_flags &= ~VM_MERGEABLE; | 2612 | mm->def_flags &= ~VM_MERGEABLE; |
2616 | 2613 | ||
2617 | walk.mm = mm; | 2614 | walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL); |
2618 | walk_page_range(0, TASK_SIZE, &walk); | ||
2619 | 2615 | ||
2620 | out_up: | 2616 | out_up: |
2621 | up_write(&mm->mmap_sem); | 2617 | up_write(&mm->mmap_sem); |
@@ -2633,13 +2629,14 @@ static int __s390_reset_cmma(pte_t *pte, unsigned long addr, | |||
2633 | return 0; | 2629 | return 0; |
2634 | } | 2630 | } |
2635 | 2631 | ||
2632 | static const struct mm_walk_ops reset_cmma_walk_ops = { | ||
2633 | .pte_entry = __s390_reset_cmma, | ||
2634 | }; | ||
2635 | |||
2636 | void s390_reset_cmma(struct mm_struct *mm) | 2636 | void s390_reset_cmma(struct mm_struct *mm) |
2637 | { | 2637 | { |
2638 | struct mm_walk walk = { .pte_entry = __s390_reset_cmma }; | ||
2639 | |||
2640 | down_write(&mm->mmap_sem); | 2638 | down_write(&mm->mmap_sem); |
2641 | walk.mm = mm; | 2639 | walk_page_range(mm, 0, TASK_SIZE, &reset_cmma_walk_ops, NULL); |
2642 | walk_page_range(0, TASK_SIZE, &walk); | ||
2643 | up_write(&mm->mmap_sem); | 2640 | up_write(&mm->mmap_sem); |
2644 | } | 2641 | } |
2645 | EXPORT_SYMBOL_GPL(s390_reset_cmma); | 2642 | EXPORT_SYMBOL_GPL(s390_reset_cmma); |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 8857da830b86..bf43d1d60059 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -513,7 +513,9 @@ static int smaps_pte_hole(unsigned long addr, unsigned long end, | |||
513 | 513 | ||
514 | return 0; | 514 | return 0; |
515 | } | 515 | } |
516 | #endif | 516 | #else |
517 | #define smaps_pte_hole NULL | ||
518 | #endif /* CONFIG_SHMEM */ | ||
517 | 519 | ||
518 | static void smaps_pte_entry(pte_t *pte, unsigned long addr, | 520 | static void smaps_pte_entry(pte_t *pte, unsigned long addr, |
519 | struct mm_walk *walk) | 521 | struct mm_walk *walk) |
@@ -729,21 +731,24 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, | |||
729 | } | 731 | } |
730 | return 0; | 732 | return 0; |
731 | } | 733 | } |
734 | #else | ||
735 | #define smaps_hugetlb_range NULL | ||
732 | #endif /* HUGETLB_PAGE */ | 736 | #endif /* HUGETLB_PAGE */ |
733 | 737 | ||
738 | static const struct mm_walk_ops smaps_walk_ops = { | ||
739 | .pmd_entry = smaps_pte_range, | ||
740 | .hugetlb_entry = smaps_hugetlb_range, | ||
741 | }; | ||
742 | |||
743 | static const struct mm_walk_ops smaps_shmem_walk_ops = { | ||
744 | .pmd_entry = smaps_pte_range, | ||
745 | .hugetlb_entry = smaps_hugetlb_range, | ||
746 | .pte_hole = smaps_pte_hole, | ||
747 | }; | ||
748 | |||
734 | static void smap_gather_stats(struct vm_area_struct *vma, | 749 | static void smap_gather_stats(struct vm_area_struct *vma, |
735 | struct mem_size_stats *mss) | 750 | struct mem_size_stats *mss) |
736 | { | 751 | { |
737 | struct mm_walk smaps_walk = { | ||
738 | .pmd_entry = smaps_pte_range, | ||
739 | #ifdef CONFIG_HUGETLB_PAGE | ||
740 | .hugetlb_entry = smaps_hugetlb_range, | ||
741 | #endif | ||
742 | .mm = vma->vm_mm, | ||
743 | }; | ||
744 | |||
745 | smaps_walk.private = mss; | ||
746 | |||
747 | #ifdef CONFIG_SHMEM | 752 | #ifdef CONFIG_SHMEM |
748 | /* In case of smaps_rollup, reset the value from previous vma */ | 753 | /* In case of smaps_rollup, reset the value from previous vma */ |
749 | mss->check_shmem_swap = false; | 754 | mss->check_shmem_swap = false; |
@@ -765,12 +770,13 @@ static void smap_gather_stats(struct vm_area_struct *vma, | |||
765 | mss->swap += shmem_swapped; | 770 | mss->swap += shmem_swapped; |
766 | } else { | 771 | } else { |
767 | mss->check_shmem_swap = true; | 772 | mss->check_shmem_swap = true; |
768 | smaps_walk.pte_hole = smaps_pte_hole; | 773 | walk_page_vma(vma, &smaps_shmem_walk_ops, mss); |
774 | return; | ||
769 | } | 775 | } |
770 | } | 776 | } |
771 | #endif | 777 | #endif |
772 | /* mmap_sem is held in m_start */ | 778 | /* mmap_sem is held in m_start */ |
773 | walk_page_vma(vma, &smaps_walk); | 779 | walk_page_vma(vma, &smaps_walk_ops, mss); |
774 | } | 780 | } |
775 | 781 | ||
776 | #define SEQ_PUT_DEC(str, val) \ | 782 | #define SEQ_PUT_DEC(str, val) \ |
@@ -1118,6 +1124,11 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end, | |||
1118 | return 0; | 1124 | return 0; |
1119 | } | 1125 | } |
1120 | 1126 | ||
1127 | static const struct mm_walk_ops clear_refs_walk_ops = { | ||
1128 | .pmd_entry = clear_refs_pte_range, | ||
1129 | .test_walk = clear_refs_test_walk, | ||
1130 | }; | ||
1131 | |||
1121 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, | 1132 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, |
1122 | size_t count, loff_t *ppos) | 1133 | size_t count, loff_t *ppos) |
1123 | { | 1134 | { |
@@ -1151,12 +1162,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
1151 | struct clear_refs_private cp = { | 1162 | struct clear_refs_private cp = { |
1152 | .type = type, | 1163 | .type = type, |
1153 | }; | 1164 | }; |
1154 | struct mm_walk clear_refs_walk = { | ||
1155 | .pmd_entry = clear_refs_pte_range, | ||
1156 | .test_walk = clear_refs_test_walk, | ||
1157 | .mm = mm, | ||
1158 | .private = &cp, | ||
1159 | }; | ||
1160 | 1165 | ||
1161 | if (type == CLEAR_REFS_MM_HIWATER_RSS) { | 1166 | if (type == CLEAR_REFS_MM_HIWATER_RSS) { |
1162 | if (down_write_killable(&mm->mmap_sem)) { | 1167 | if (down_write_killable(&mm->mmap_sem)) { |
@@ -1217,7 +1222,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
1217 | 0, NULL, mm, 0, -1UL); | 1222 | 0, NULL, mm, 0, -1UL); |
1218 | mmu_notifier_invalidate_range_start(&range); | 1223 | mmu_notifier_invalidate_range_start(&range); |
1219 | } | 1224 | } |
1220 | walk_page_range(0, mm->highest_vm_end, &clear_refs_walk); | 1225 | walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops, |
1226 | &cp); | ||
1221 | if (type == CLEAR_REFS_SOFT_DIRTY) | 1227 | if (type == CLEAR_REFS_SOFT_DIRTY) |
1222 | mmu_notifier_invalidate_range_end(&range); | 1228 | mmu_notifier_invalidate_range_end(&range); |
1223 | tlb_finish_mmu(&tlb, 0, -1); | 1229 | tlb_finish_mmu(&tlb, 0, -1); |
@@ -1489,8 +1495,16 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, | |||
1489 | 1495 | ||
1490 | return err; | 1496 | return err; |
1491 | } | 1497 | } |
1498 | #else | ||
1499 | #define pagemap_hugetlb_range NULL | ||
1492 | #endif /* HUGETLB_PAGE */ | 1500 | #endif /* HUGETLB_PAGE */ |
1493 | 1501 | ||
1502 | static const struct mm_walk_ops pagemap_ops = { | ||
1503 | .pmd_entry = pagemap_pmd_range, | ||
1504 | .pte_hole = pagemap_pte_hole, | ||
1505 | .hugetlb_entry = pagemap_hugetlb_range, | ||
1506 | }; | ||
1507 | |||
1494 | /* | 1508 | /* |
1495 | * /proc/pid/pagemap - an array mapping virtual pages to pfns | 1509 | * /proc/pid/pagemap - an array mapping virtual pages to pfns |
1496 | * | 1510 | * |
@@ -1522,7 +1536,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
1522 | { | 1536 | { |
1523 | struct mm_struct *mm = file->private_data; | 1537 | struct mm_struct *mm = file->private_data; |
1524 | struct pagemapread pm; | 1538 | struct pagemapread pm; |
1525 | struct mm_walk pagemap_walk = {}; | ||
1526 | unsigned long src; | 1539 | unsigned long src; |
1527 | unsigned long svpfn; | 1540 | unsigned long svpfn; |
1528 | unsigned long start_vaddr; | 1541 | unsigned long start_vaddr; |
@@ -1550,14 +1563,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
1550 | if (!pm.buffer) | 1563 | if (!pm.buffer) |
1551 | goto out_mm; | 1564 | goto out_mm; |
1552 | 1565 | ||
1553 | pagemap_walk.pmd_entry = pagemap_pmd_range; | ||
1554 | pagemap_walk.pte_hole = pagemap_pte_hole; | ||
1555 | #ifdef CONFIG_HUGETLB_PAGE | ||
1556 | pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; | ||
1557 | #endif | ||
1558 | pagemap_walk.mm = mm; | ||
1559 | pagemap_walk.private = ± | ||
1560 | |||
1561 | src = *ppos; | 1566 | src = *ppos; |
1562 | svpfn = src / PM_ENTRY_BYTES; | 1567 | svpfn = src / PM_ENTRY_BYTES; |
1563 | start_vaddr = svpfn << PAGE_SHIFT; | 1568 | start_vaddr = svpfn << PAGE_SHIFT; |
@@ -1586,7 +1591,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
1586 | ret = down_read_killable(&mm->mmap_sem); | 1591 | ret = down_read_killable(&mm->mmap_sem); |
1587 | if (ret) | 1592 | if (ret) |
1588 | goto out_free; | 1593 | goto out_free; |
1589 | ret = walk_page_range(start_vaddr, end, &pagemap_walk); | 1594 | ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm); |
1590 | up_read(&mm->mmap_sem); | 1595 | up_read(&mm->mmap_sem); |
1591 | start_vaddr = end; | 1596 | start_vaddr = end; |
1592 | 1597 | ||
@@ -1798,6 +1803,11 @@ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, | |||
1798 | } | 1803 | } |
1799 | #endif | 1804 | #endif |
1800 | 1805 | ||
1806 | static const struct mm_walk_ops show_numa_ops = { | ||
1807 | .hugetlb_entry = gather_hugetlb_stats, | ||
1808 | .pmd_entry = gather_pte_stats, | ||
1809 | }; | ||
1810 | |||
1801 | /* | 1811 | /* |
1802 | * Display pages allocated per node and memory policy via /proc. | 1812 | * Display pages allocated per node and memory policy via /proc. |
1803 | */ | 1813 | */ |
@@ -1809,12 +1819,6 @@ static int show_numa_map(struct seq_file *m, void *v) | |||
1809 | struct numa_maps *md = &numa_priv->md; | 1819 | struct numa_maps *md = &numa_priv->md; |
1810 | struct file *file = vma->vm_file; | 1820 | struct file *file = vma->vm_file; |
1811 | struct mm_struct *mm = vma->vm_mm; | 1821 | struct mm_struct *mm = vma->vm_mm; |
1812 | struct mm_walk walk = { | ||
1813 | .hugetlb_entry = gather_hugetlb_stats, | ||
1814 | .pmd_entry = gather_pte_stats, | ||
1815 | .private = md, | ||
1816 | .mm = mm, | ||
1817 | }; | ||
1818 | struct mempolicy *pol; | 1822 | struct mempolicy *pol; |
1819 | char buffer[64]; | 1823 | char buffer[64]; |
1820 | int nid; | 1824 | int nid; |
@@ -1848,7 +1852,7 @@ static int show_numa_map(struct seq_file *m, void *v) | |||
1848 | seq_puts(m, " huge"); | 1852 | seq_puts(m, " huge"); |
1849 | 1853 | ||
1850 | /* mmap_sem is held by m_start */ | 1854 | /* mmap_sem is held by m_start */ |
1851 | walk_page_vma(vma, &walk); | 1855 | walk_page_vma(vma, &show_numa_ops, md); |
1852 | 1856 | ||
1853 | if (!md->pages) | 1857 | if (!md->pages) |
1854 | goto out; | 1858 | goto out; |
diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index df278a94086d..bddd9759bab9 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h | |||
@@ -4,31 +4,28 @@ | |||
4 | 4 | ||
5 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
6 | 6 | ||
7 | struct mm_walk; | ||
8 | |||
7 | /** | 9 | /** |
8 | * mm_walk - callbacks for walk_page_range | 10 | * mm_walk_ops - callbacks for walk_page_range |
9 | * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry | 11 | * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry |
10 | * this handler should only handle pud_trans_huge() puds. | 12 | * this handler should only handle pud_trans_huge() puds. |
11 | * the pmd_entry or pte_entry callbacks will be used for | 13 | * the pmd_entry or pte_entry callbacks will be used for |
12 | * regular PUDs. | 14 | * regular PUDs. |
13 | * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry | 15 | * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry |
14 | * this handler is required to be able to handle | 16 | * this handler is required to be able to handle |
15 | * pmd_trans_huge() pmds. They may simply choose to | 17 | * pmd_trans_huge() pmds. They may simply choose to |
16 | * split_huge_page() instead of handling it explicitly. | 18 | * split_huge_page() instead of handling it explicitly. |
17 | * @pte_entry: if set, called for each non-empty PTE (4th-level) entry | 19 | * @pte_entry: if set, called for each non-empty PTE (4th-level) entry |
18 | * @pte_hole: if set, called for each hole at all levels | 20 | * @pte_hole: if set, called for each hole at all levels |
19 | * @hugetlb_entry: if set, called for each hugetlb entry | 21 | * @hugetlb_entry: if set, called for each hugetlb entry |
20 | * @test_walk: caller specific callback function to determine whether | 22 | * @test_walk: caller specific callback function to determine whether |
21 | * we walk over the current vma or not. Returning 0 | 23 | * we walk over the current vma or not. Returning 0 means |
22 | * value means "do page table walk over the current vma," | 24 | * "do page table walk over the current vma", returning |
23 | * and a negative one means "abort current page table walk | 25 | * a negative value means "abort current page table walk |
24 | * right now." 1 means "skip the current vma." | 26 | * right now" and returning 1 means "skip the current vma" |
25 | * @mm: mm_struct representing the target process of page table walk | ||
26 | * @vma: vma currently walked (NULL if walking outside vmas) | ||
27 | * @private: private data for callbacks' usage | ||
28 | * | ||
29 | * (see the comment on walk_page_range() for more details) | ||
30 | */ | 27 | */ |
31 | struct mm_walk { | 28 | struct mm_walk_ops { |
32 | int (*pud_entry)(pud_t *pud, unsigned long addr, | 29 | int (*pud_entry)(pud_t *pud, unsigned long addr, |
33 | unsigned long next, struct mm_walk *walk); | 30 | unsigned long next, struct mm_walk *walk); |
34 | int (*pmd_entry)(pmd_t *pmd, unsigned long addr, | 31 | int (*pmd_entry)(pmd_t *pmd, unsigned long addr, |
@@ -42,13 +39,28 @@ struct mm_walk { | |||
42 | struct mm_walk *walk); | 39 | struct mm_walk *walk); |
43 | int (*test_walk)(unsigned long addr, unsigned long next, | 40 | int (*test_walk)(unsigned long addr, unsigned long next, |
44 | struct mm_walk *walk); | 41 | struct mm_walk *walk); |
42 | }; | ||
43 | |||
44 | /** | ||
45 | * mm_walk - walk_page_range data | ||
46 | * @ops: operation to call during the walk | ||
47 | * @mm: mm_struct representing the target process of page table walk | ||
48 | * @vma: vma currently walked (NULL if walking outside vmas) | ||
49 | * @private: private data for callbacks' usage | ||
50 | * | ||
51 | * (see the comment on walk_page_range() for more details) | ||
52 | */ | ||
53 | struct mm_walk { | ||
54 | const struct mm_walk_ops *ops; | ||
45 | struct mm_struct *mm; | 55 | struct mm_struct *mm; |
46 | struct vm_area_struct *vma; | 56 | struct vm_area_struct *vma; |
47 | void *private; | 57 | void *private; |
48 | }; | 58 | }; |
49 | 59 | ||
50 | int walk_page_range(unsigned long addr, unsigned long end, | 60 | int walk_page_range(struct mm_struct *mm, unsigned long start, |
51 | struct mm_walk *walk); | 61 | unsigned long end, const struct mm_walk_ops *ops, |
52 | int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk); | 62 | void *private); |
63 | int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, | ||
64 | void *private); | ||
53 | 65 | ||
54 | #endif /* _LINUX_PAGEWALK_H */ | 66 | #endif /* _LINUX_PAGEWALK_H */ |
@@ -852,6 +852,13 @@ void hmm_range_unregister(struct hmm_range *range) | |||
852 | } | 852 | } |
853 | EXPORT_SYMBOL(hmm_range_unregister); | 853 | EXPORT_SYMBOL(hmm_range_unregister); |
854 | 854 | ||
855 | static const struct mm_walk_ops hmm_walk_ops = { | ||
856 | .pud_entry = hmm_vma_walk_pud, | ||
857 | .pmd_entry = hmm_vma_walk_pmd, | ||
858 | .pte_hole = hmm_vma_walk_hole, | ||
859 | .hugetlb_entry = hmm_vma_walk_hugetlb_entry, | ||
860 | }; | ||
861 | |||
855 | /** | 862 | /** |
856 | * hmm_range_fault - try to fault some address in a virtual address range | 863 | * hmm_range_fault - try to fault some address in a virtual address range |
857 | * @range: range being faulted | 864 | * @range: range being faulted |
@@ -887,7 +894,6 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags) | |||
887 | struct hmm_vma_walk hmm_vma_walk; | 894 | struct hmm_vma_walk hmm_vma_walk; |
888 | struct hmm *hmm = range->hmm; | 895 | struct hmm *hmm = range->hmm; |
889 | struct vm_area_struct *vma; | 896 | struct vm_area_struct *vma; |
890 | struct mm_walk mm_walk; | ||
891 | int ret; | 897 | int ret; |
892 | 898 | ||
893 | lockdep_assert_held(&hmm->mmu_notifier.mm->mmap_sem); | 899 | lockdep_assert_held(&hmm->mmu_notifier.mm->mmap_sem); |
@@ -916,21 +922,14 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags) | |||
916 | hmm_vma_walk.last = start; | 922 | hmm_vma_walk.last = start; |
917 | hmm_vma_walk.flags = flags; | 923 | hmm_vma_walk.flags = flags; |
918 | hmm_vma_walk.range = range; | 924 | hmm_vma_walk.range = range; |
919 | mm_walk.private = &hmm_vma_walk; | ||
920 | end = min(range->end, vma->vm_end); | 925 | end = min(range->end, vma->vm_end); |
921 | 926 | ||
922 | mm_walk.vma = vma; | 927 | walk_page_range(vma->vm_mm, start, end, &hmm_walk_ops, |
923 | mm_walk.mm = vma->vm_mm; | 928 | &hmm_vma_walk); |
924 | mm_walk.pte_entry = NULL; | ||
925 | mm_walk.test_walk = NULL; | ||
926 | mm_walk.hugetlb_entry = NULL; | ||
927 | mm_walk.pud_entry = hmm_vma_walk_pud; | ||
928 | mm_walk.pmd_entry = hmm_vma_walk_pmd; | ||
929 | mm_walk.pte_hole = hmm_vma_walk_hole; | ||
930 | mm_walk.hugetlb_entry = hmm_vma_walk_hugetlb_entry; | ||
931 | 929 | ||
932 | do { | 930 | do { |
933 | ret = walk_page_range(start, end, &mm_walk); | 931 | ret = walk_page_range(vma->vm_mm, start, end, |
932 | &hmm_walk_ops, &hmm_vma_walk); | ||
934 | start = hmm_vma_walk.last; | 933 | start = hmm_vma_walk.last; |
935 | 934 | ||
936 | /* Keep trying while the range is valid. */ | 935 | /* Keep trying while the range is valid. */ |
diff --git a/mm/madvise.c b/mm/madvise.c index 80a78bb16782..afe2b015ea58 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -226,19 +226,9 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start, | |||
226 | return 0; | 226 | return 0; |
227 | } | 227 | } |
228 | 228 | ||
229 | static void force_swapin_readahead(struct vm_area_struct *vma, | 229 | static const struct mm_walk_ops swapin_walk_ops = { |
230 | unsigned long start, unsigned long end) | 230 | .pmd_entry = swapin_walk_pmd_entry, |
231 | { | 231 | }; |
232 | struct mm_walk walk = { | ||
233 | .mm = vma->vm_mm, | ||
234 | .pmd_entry = swapin_walk_pmd_entry, | ||
235 | .private = vma, | ||
236 | }; | ||
237 | |||
238 | walk_page_range(start, end, &walk); | ||
239 | |||
240 | lru_add_drain(); /* Push any new pages onto the LRU now */ | ||
241 | } | ||
242 | 232 | ||
243 | static void force_shm_swapin_readahead(struct vm_area_struct *vma, | 233 | static void force_shm_swapin_readahead(struct vm_area_struct *vma, |
244 | unsigned long start, unsigned long end, | 234 | unsigned long start, unsigned long end, |
@@ -280,7 +270,8 @@ static long madvise_willneed(struct vm_area_struct *vma, | |||
280 | *prev = vma; | 270 | *prev = vma; |
281 | #ifdef CONFIG_SWAP | 271 | #ifdef CONFIG_SWAP |
282 | if (!file) { | 272 | if (!file) { |
283 | force_swapin_readahead(vma, start, end); | 273 | walk_page_range(vma->vm_mm, start, end, &swapin_walk_ops, vma); |
274 | lru_add_drain(); /* Push any new pages onto the LRU now */ | ||
284 | return 0; | 275 | return 0; |
285 | } | 276 | } |
286 | 277 | ||
@@ -441,20 +432,9 @@ next: | |||
441 | return 0; | 432 | return 0; |
442 | } | 433 | } |
443 | 434 | ||
444 | static void madvise_free_page_range(struct mmu_gather *tlb, | 435 | static const struct mm_walk_ops madvise_free_walk_ops = { |
445 | struct vm_area_struct *vma, | 436 | .pmd_entry = madvise_free_pte_range, |
446 | unsigned long addr, unsigned long end) | 437 | }; |
447 | { | ||
448 | struct mm_walk free_walk = { | ||
449 | .pmd_entry = madvise_free_pte_range, | ||
450 | .mm = vma->vm_mm, | ||
451 | .private = tlb, | ||
452 | }; | ||
453 | |||
454 | tlb_start_vma(tlb, vma); | ||
455 | walk_page_range(addr, end, &free_walk); | ||
456 | tlb_end_vma(tlb, vma); | ||
457 | } | ||
458 | 438 | ||
459 | static int madvise_free_single_vma(struct vm_area_struct *vma, | 439 | static int madvise_free_single_vma(struct vm_area_struct *vma, |
460 | unsigned long start_addr, unsigned long end_addr) | 440 | unsigned long start_addr, unsigned long end_addr) |
@@ -481,7 +461,10 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, | |||
481 | update_hiwater_rss(mm); | 461 | update_hiwater_rss(mm); |
482 | 462 | ||
483 | mmu_notifier_invalidate_range_start(&range); | 463 | mmu_notifier_invalidate_range_start(&range); |
484 | madvise_free_page_range(&tlb, vma, range.start, range.end); | 464 | tlb_start_vma(&tlb, vma); |
465 | walk_page_range(vma->vm_mm, range.start, range.end, | ||
466 | &madvise_free_walk_ops, &tlb); | ||
467 | tlb_end_vma(&tlb, vma); | ||
485 | mmu_notifier_invalidate_range_end(&range); | 468 | mmu_notifier_invalidate_range_end(&range); |
486 | tlb_finish_mmu(&tlb, range.start, range.end); | 469 | tlb_finish_mmu(&tlb, range.start, range.end); |
487 | 470 | ||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4c3af5d71ab1..9b2516a76be2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -5283,17 +5283,16 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, | |||
5283 | return 0; | 5283 | return 0; |
5284 | } | 5284 | } |
5285 | 5285 | ||
5286 | static const struct mm_walk_ops precharge_walk_ops = { | ||
5287 | .pmd_entry = mem_cgroup_count_precharge_pte_range, | ||
5288 | }; | ||
5289 | |||
5286 | static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) | 5290 | static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) |
5287 | { | 5291 | { |
5288 | unsigned long precharge; | 5292 | unsigned long precharge; |
5289 | 5293 | ||
5290 | struct mm_walk mem_cgroup_count_precharge_walk = { | ||
5291 | .pmd_entry = mem_cgroup_count_precharge_pte_range, | ||
5292 | .mm = mm, | ||
5293 | }; | ||
5294 | down_read(&mm->mmap_sem); | 5294 | down_read(&mm->mmap_sem); |
5295 | walk_page_range(0, mm->highest_vm_end, | 5295 | walk_page_range(mm, 0, mm->highest_vm_end, &precharge_walk_ops, NULL); |
5296 | &mem_cgroup_count_precharge_walk); | ||
5297 | up_read(&mm->mmap_sem); | 5296 | up_read(&mm->mmap_sem); |
5298 | 5297 | ||
5299 | precharge = mc.precharge; | 5298 | precharge = mc.precharge; |
@@ -5562,13 +5561,12 @@ put: /* get_mctgt_type() gets the page */ | |||
5562 | return ret; | 5561 | return ret; |
5563 | } | 5562 | } |
5564 | 5563 | ||
5564 | static const struct mm_walk_ops charge_walk_ops = { | ||
5565 | .pmd_entry = mem_cgroup_move_charge_pte_range, | ||
5566 | }; | ||
5567 | |||
5565 | static void mem_cgroup_move_charge(void) | 5568 | static void mem_cgroup_move_charge(void) |
5566 | { | 5569 | { |
5567 | struct mm_walk mem_cgroup_move_charge_walk = { | ||
5568 | .pmd_entry = mem_cgroup_move_charge_pte_range, | ||
5569 | .mm = mc.mm, | ||
5570 | }; | ||
5571 | |||
5572 | lru_add_drain_all(); | 5570 | lru_add_drain_all(); |
5573 | /* | 5571 | /* |
5574 | * Signal lock_page_memcg() to take the memcg's move_lock | 5572 | * Signal lock_page_memcg() to take the memcg's move_lock |
@@ -5594,7 +5592,8 @@ retry: | |||
5594 | * When we have consumed all precharges and failed in doing | 5592 | * When we have consumed all precharges and failed in doing |
5595 | * additional charge, the page walk just aborts. | 5593 | * additional charge, the page walk just aborts. |
5596 | */ | 5594 | */ |
5597 | walk_page_range(0, mc.mm->highest_vm_end, &mem_cgroup_move_charge_walk); | 5595 | walk_page_range(mc.mm, 0, mc.mm->highest_vm_end, &charge_walk_ops, |
5596 | NULL); | ||
5598 | 5597 | ||
5599 | up_read(&mc.mm->mmap_sem); | 5598 | up_read(&mc.mm->mmap_sem); |
5600 | atomic_dec(&mc.from->moving_account); | 5599 | atomic_dec(&mc.from->moving_account); |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 3a96def1e796..f000771558d8 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -655,6 +655,12 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, | |||
655 | return 1; | 655 | return 1; |
656 | } | 656 | } |
657 | 657 | ||
658 | static const struct mm_walk_ops queue_pages_walk_ops = { | ||
659 | .hugetlb_entry = queue_pages_hugetlb, | ||
660 | .pmd_entry = queue_pages_pte_range, | ||
661 | .test_walk = queue_pages_test_walk, | ||
662 | }; | ||
663 | |||
658 | /* | 664 | /* |
659 | * Walk through page tables and collect pages to be migrated. | 665 | * Walk through page tables and collect pages to be migrated. |
660 | * | 666 | * |
@@ -679,15 +685,8 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
679 | .nmask = nodes, | 685 | .nmask = nodes, |
680 | .prev = NULL, | 686 | .prev = NULL, |
681 | }; | 687 | }; |
682 | struct mm_walk queue_pages_walk = { | ||
683 | .hugetlb_entry = queue_pages_hugetlb, | ||
684 | .pmd_entry = queue_pages_pte_range, | ||
685 | .test_walk = queue_pages_test_walk, | ||
686 | .mm = mm, | ||
687 | .private = &qp, | ||
688 | }; | ||
689 | 688 | ||
690 | return walk_page_range(start, end, &queue_pages_walk); | 689 | return walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp); |
691 | } | 690 | } |
692 | 691 | ||
693 | /* | 692 | /* |
diff --git a/mm/migrate.c b/mm/migrate.c index c9c73a35aca7..9f4ed4e985c1 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -2320,6 +2320,11 @@ next: | |||
2320 | return 0; | 2320 | return 0; |
2321 | } | 2321 | } |
2322 | 2322 | ||
2323 | static const struct mm_walk_ops migrate_vma_walk_ops = { | ||
2324 | .pmd_entry = migrate_vma_collect_pmd, | ||
2325 | .pte_hole = migrate_vma_collect_hole, | ||
2326 | }; | ||
2327 | |||
2323 | /* | 2328 | /* |
2324 | * migrate_vma_collect() - collect pages over a range of virtual addresses | 2329 | * migrate_vma_collect() - collect pages over a range of virtual addresses |
2325 | * @migrate: migrate struct containing all migration information | 2330 | * @migrate: migrate struct containing all migration information |
@@ -2331,21 +2336,15 @@ next: | |||
2331 | static void migrate_vma_collect(struct migrate_vma *migrate) | 2336 | static void migrate_vma_collect(struct migrate_vma *migrate) |
2332 | { | 2337 | { |
2333 | struct mmu_notifier_range range; | 2338 | struct mmu_notifier_range range; |
2334 | struct mm_walk mm_walk = { | ||
2335 | .pmd_entry = migrate_vma_collect_pmd, | ||
2336 | .pte_hole = migrate_vma_collect_hole, | ||
2337 | .vma = migrate->vma, | ||
2338 | .mm = migrate->vma->vm_mm, | ||
2339 | .private = migrate, | ||
2340 | }; | ||
2341 | 2339 | ||
2342 | mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm_walk.mm, | 2340 | mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, |
2343 | migrate->start, | 2341 | migrate->vma->vm_mm, migrate->start, migrate->end); |
2344 | migrate->end); | ||
2345 | mmu_notifier_invalidate_range_start(&range); | 2342 | mmu_notifier_invalidate_range_start(&range); |
2346 | walk_page_range(migrate->start, migrate->end, &mm_walk); | ||
2347 | mmu_notifier_invalidate_range_end(&range); | ||
2348 | 2343 | ||
2344 | walk_page_range(migrate->vma->vm_mm, migrate->start, migrate->end, | ||
2345 | &migrate_vma_walk_ops, migrate); | ||
2346 | |||
2347 | mmu_notifier_invalidate_range_end(&range); | ||
2349 | migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT); | 2348 | migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT); |
2350 | } | 2349 | } |
2351 | 2350 | ||
diff --git a/mm/mincore.c b/mm/mincore.c index 3b051b6ab3fe..f9a9dbe8cd33 100644 --- a/mm/mincore.c +++ b/mm/mincore.c | |||
@@ -193,6 +193,12 @@ static inline bool can_do_mincore(struct vm_area_struct *vma) | |||
193 | inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0; | 193 | inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0; |
194 | } | 194 | } |
195 | 195 | ||
196 | static const struct mm_walk_ops mincore_walk_ops = { | ||
197 | .pmd_entry = mincore_pte_range, | ||
198 | .pte_hole = mincore_unmapped_range, | ||
199 | .hugetlb_entry = mincore_hugetlb, | ||
200 | }; | ||
201 | |||
196 | /* | 202 | /* |
197 | * Do a chunk of "sys_mincore()". We've already checked | 203 | * Do a chunk of "sys_mincore()". We've already checked |
198 | * all the arguments, we hold the mmap semaphore: we should | 204 | * all the arguments, we hold the mmap semaphore: we should |
@@ -203,12 +209,6 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v | |||
203 | struct vm_area_struct *vma; | 209 | struct vm_area_struct *vma; |
204 | unsigned long end; | 210 | unsigned long end; |
205 | int err; | 211 | int err; |
206 | struct mm_walk mincore_walk = { | ||
207 | .pmd_entry = mincore_pte_range, | ||
208 | .pte_hole = mincore_unmapped_range, | ||
209 | .hugetlb_entry = mincore_hugetlb, | ||
210 | .private = vec, | ||
211 | }; | ||
212 | 212 | ||
213 | vma = find_vma(current->mm, addr); | 213 | vma = find_vma(current->mm, addr); |
214 | if (!vma || addr < vma->vm_start) | 214 | if (!vma || addr < vma->vm_start) |
@@ -219,8 +219,7 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v | |||
219 | memset(vec, 1, pages); | 219 | memset(vec, 1, pages); |
220 | return pages; | 220 | return pages; |
221 | } | 221 | } |
222 | mincore_walk.mm = vma->vm_mm; | 222 | err = walk_page_range(vma->vm_mm, addr, end, &mincore_walk_ops, vec); |
223 | err = walk_page_range(addr, end, &mincore_walk); | ||
224 | if (err < 0) | 223 | if (err < 0) |
225 | return err; | 224 | return err; |
226 | return (end - addr) >> PAGE_SHIFT; | 225 | return (end - addr) >> PAGE_SHIFT; |
diff --git a/mm/mprotect.c b/mm/mprotect.c index cc73318dbc25..675e5d34a507 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -329,20 +329,11 @@ static int prot_none_test(unsigned long addr, unsigned long next, | |||
329 | return 0; | 329 | return 0; |
330 | } | 330 | } |
331 | 331 | ||
332 | static int prot_none_walk(struct vm_area_struct *vma, unsigned long start, | 332 | static const struct mm_walk_ops prot_none_walk_ops = { |
333 | unsigned long end, unsigned long newflags) | 333 | .pte_entry = prot_none_pte_entry, |
334 | { | 334 | .hugetlb_entry = prot_none_hugetlb_entry, |
335 | pgprot_t new_pgprot = vm_get_page_prot(newflags); | 335 | .test_walk = prot_none_test, |
336 | struct mm_walk prot_none_walk = { | 336 | }; |
337 | .pte_entry = prot_none_pte_entry, | ||
338 | .hugetlb_entry = prot_none_hugetlb_entry, | ||
339 | .test_walk = prot_none_test, | ||
340 | .mm = current->mm, | ||
341 | .private = &new_pgprot, | ||
342 | }; | ||
343 | |||
344 | return walk_page_range(start, end, &prot_none_walk); | ||
345 | } | ||
346 | 337 | ||
347 | int | 338 | int |
348 | mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, | 339 | mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, |
@@ -369,7 +360,10 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, | |||
369 | if (arch_has_pfn_modify_check() && | 360 | if (arch_has_pfn_modify_check() && |
370 | (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && | 361 | (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && |
371 | (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) { | 362 | (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) { |
372 | error = prot_none_walk(vma, start, end, newflags); | 363 | pgprot_t new_pgprot = vm_get_page_prot(newflags); |
364 | |||
365 | error = walk_page_range(current->mm, start, end, | ||
366 | &prot_none_walk_ops, &new_pgprot); | ||
373 | if (error) | 367 | if (error) |
374 | return error; | 368 | return error; |
375 | } | 369 | } |
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 8a92a961a2ee..b8762b673a3d 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c | |||
@@ -9,10 +9,11 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
9 | { | 9 | { |
10 | pte_t *pte; | 10 | pte_t *pte; |
11 | int err = 0; | 11 | int err = 0; |
12 | const struct mm_walk_ops *ops = walk->ops; | ||
12 | 13 | ||
13 | pte = pte_offset_map(pmd, addr); | 14 | pte = pte_offset_map(pmd, addr); |
14 | for (;;) { | 15 | for (;;) { |
15 | err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk); | 16 | err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk); |
16 | if (err) | 17 | if (err) |
17 | break; | 18 | break; |
18 | addr += PAGE_SIZE; | 19 | addr += PAGE_SIZE; |
@@ -30,6 +31,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, | |||
30 | { | 31 | { |
31 | pmd_t *pmd; | 32 | pmd_t *pmd; |
32 | unsigned long next; | 33 | unsigned long next; |
34 | const struct mm_walk_ops *ops = walk->ops; | ||
33 | int err = 0; | 35 | int err = 0; |
34 | 36 | ||
35 | pmd = pmd_offset(pud, addr); | 37 | pmd = pmd_offset(pud, addr); |
@@ -37,8 +39,8 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, | |||
37 | again: | 39 | again: |
38 | next = pmd_addr_end(addr, end); | 40 | next = pmd_addr_end(addr, end); |
39 | if (pmd_none(*pmd) || !walk->vma) { | 41 | if (pmd_none(*pmd) || !walk->vma) { |
40 | if (walk->pte_hole) | 42 | if (ops->pte_hole) |
41 | err = walk->pte_hole(addr, next, walk); | 43 | err = ops->pte_hole(addr, next, walk); |
42 | if (err) | 44 | if (err) |
43 | break; | 45 | break; |
44 | continue; | 46 | continue; |
@@ -47,8 +49,8 @@ again: | |||
47 | * This implies that each ->pmd_entry() handler | 49 | * This implies that each ->pmd_entry() handler |
48 | * needs to know about pmd_trans_huge() pmds | 50 | * needs to know about pmd_trans_huge() pmds |
49 | */ | 51 | */ |
50 | if (walk->pmd_entry) | 52 | if (ops->pmd_entry) |
51 | err = walk->pmd_entry(pmd, addr, next, walk); | 53 | err = ops->pmd_entry(pmd, addr, next, walk); |
52 | if (err) | 54 | if (err) |
53 | break; | 55 | break; |
54 | 56 | ||
@@ -56,7 +58,7 @@ again: | |||
56 | * Check this here so we only break down trans_huge | 58 | * Check this here so we only break down trans_huge |
57 | * pages when we _need_ to | 59 | * pages when we _need_ to |
58 | */ | 60 | */ |
59 | if (!walk->pte_entry) | 61 | if (!ops->pte_entry) |
60 | continue; | 62 | continue; |
61 | 63 | ||
62 | split_huge_pmd(walk->vma, pmd, addr); | 64 | split_huge_pmd(walk->vma, pmd, addr); |
@@ -75,6 +77,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, | |||
75 | { | 77 | { |
76 | pud_t *pud; | 78 | pud_t *pud; |
77 | unsigned long next; | 79 | unsigned long next; |
80 | const struct mm_walk_ops *ops = walk->ops; | ||
78 | int err = 0; | 81 | int err = 0; |
79 | 82 | ||
80 | pud = pud_offset(p4d, addr); | 83 | pud = pud_offset(p4d, addr); |
@@ -82,18 +85,18 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, | |||
82 | again: | 85 | again: |
83 | next = pud_addr_end(addr, end); | 86 | next = pud_addr_end(addr, end); |
84 | if (pud_none(*pud) || !walk->vma) { | 87 | if (pud_none(*pud) || !walk->vma) { |
85 | if (walk->pte_hole) | 88 | if (ops->pte_hole) |
86 | err = walk->pte_hole(addr, next, walk); | 89 | err = ops->pte_hole(addr, next, walk); |
87 | if (err) | 90 | if (err) |
88 | break; | 91 | break; |
89 | continue; | 92 | continue; |
90 | } | 93 | } |
91 | 94 | ||
92 | if (walk->pud_entry) { | 95 | if (ops->pud_entry) { |
93 | spinlock_t *ptl = pud_trans_huge_lock(pud, walk->vma); | 96 | spinlock_t *ptl = pud_trans_huge_lock(pud, walk->vma); |
94 | 97 | ||
95 | if (ptl) { | 98 | if (ptl) { |
96 | err = walk->pud_entry(pud, addr, next, walk); | 99 | err = ops->pud_entry(pud, addr, next, walk); |
97 | spin_unlock(ptl); | 100 | spin_unlock(ptl); |
98 | if (err) | 101 | if (err) |
99 | break; | 102 | break; |
@@ -105,7 +108,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, | |||
105 | if (pud_none(*pud)) | 108 | if (pud_none(*pud)) |
106 | goto again; | 109 | goto again; |
107 | 110 | ||
108 | if (walk->pmd_entry || walk->pte_entry) | 111 | if (ops->pmd_entry || ops->pte_entry) |
109 | err = walk_pmd_range(pud, addr, next, walk); | 112 | err = walk_pmd_range(pud, addr, next, walk); |
110 | if (err) | 113 | if (err) |
111 | break; | 114 | break; |
@@ -119,19 +122,20 @@ static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, | |||
119 | { | 122 | { |
120 | p4d_t *p4d; | 123 | p4d_t *p4d; |
121 | unsigned long next; | 124 | unsigned long next; |
125 | const struct mm_walk_ops *ops = walk->ops; | ||
122 | int err = 0; | 126 | int err = 0; |
123 | 127 | ||
124 | p4d = p4d_offset(pgd, addr); | 128 | p4d = p4d_offset(pgd, addr); |
125 | do { | 129 | do { |
126 | next = p4d_addr_end(addr, end); | 130 | next = p4d_addr_end(addr, end); |
127 | if (p4d_none_or_clear_bad(p4d)) { | 131 | if (p4d_none_or_clear_bad(p4d)) { |
128 | if (walk->pte_hole) | 132 | if (ops->pte_hole) |
129 | err = walk->pte_hole(addr, next, walk); | 133 | err = ops->pte_hole(addr, next, walk); |
130 | if (err) | 134 | if (err) |
131 | break; | 135 | break; |
132 | continue; | 136 | continue; |
133 | } | 137 | } |
134 | if (walk->pmd_entry || walk->pte_entry) | 138 | if (ops->pmd_entry || ops->pte_entry) |
135 | err = walk_pud_range(p4d, addr, next, walk); | 139 | err = walk_pud_range(p4d, addr, next, walk); |
136 | if (err) | 140 | if (err) |
137 | break; | 141 | break; |
@@ -145,19 +149,20 @@ static int walk_pgd_range(unsigned long addr, unsigned long end, | |||
145 | { | 149 | { |
146 | pgd_t *pgd; | 150 | pgd_t *pgd; |
147 | unsigned long next; | 151 | unsigned long next; |
152 | const struct mm_walk_ops *ops = walk->ops; | ||
148 | int err = 0; | 153 | int err = 0; |
149 | 154 | ||
150 | pgd = pgd_offset(walk->mm, addr); | 155 | pgd = pgd_offset(walk->mm, addr); |
151 | do { | 156 | do { |
152 | next = pgd_addr_end(addr, end); | 157 | next = pgd_addr_end(addr, end); |
153 | if (pgd_none_or_clear_bad(pgd)) { | 158 | if (pgd_none_or_clear_bad(pgd)) { |
154 | if (walk->pte_hole) | 159 | if (ops->pte_hole) |
155 | err = walk->pte_hole(addr, next, walk); | 160 | err = ops->pte_hole(addr, next, walk); |
156 | if (err) | 161 | if (err) |
157 | break; | 162 | break; |
158 | continue; | 163 | continue; |
159 | } | 164 | } |
160 | if (walk->pmd_entry || walk->pte_entry) | 165 | if (ops->pmd_entry || ops->pte_entry) |
161 | err = walk_p4d_range(pgd, addr, next, walk); | 166 | err = walk_p4d_range(pgd, addr, next, walk); |
162 | if (err) | 167 | if (err) |
163 | break; | 168 | break; |
@@ -183,6 +188,7 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end, | |||
183 | unsigned long hmask = huge_page_mask(h); | 188 | unsigned long hmask = huge_page_mask(h); |
184 | unsigned long sz = huge_page_size(h); | 189 | unsigned long sz = huge_page_size(h); |
185 | pte_t *pte; | 190 | pte_t *pte; |
191 | const struct mm_walk_ops *ops = walk->ops; | ||
186 | int err = 0; | 192 | int err = 0; |
187 | 193 | ||
188 | do { | 194 | do { |
@@ -190,9 +196,9 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end, | |||
190 | pte = huge_pte_offset(walk->mm, addr & hmask, sz); | 196 | pte = huge_pte_offset(walk->mm, addr & hmask, sz); |
191 | 197 | ||
192 | if (pte) | 198 | if (pte) |
193 | err = walk->hugetlb_entry(pte, hmask, addr, next, walk); | 199 | err = ops->hugetlb_entry(pte, hmask, addr, next, walk); |
194 | else if (walk->pte_hole) | 200 | else if (ops->pte_hole) |
195 | err = walk->pte_hole(addr, next, walk); | 201 | err = ops->pte_hole(addr, next, walk); |
196 | 202 | ||
197 | if (err) | 203 | if (err) |
198 | break; | 204 | break; |
@@ -220,9 +226,10 @@ static int walk_page_test(unsigned long start, unsigned long end, | |||
220 | struct mm_walk *walk) | 226 | struct mm_walk *walk) |
221 | { | 227 | { |
222 | struct vm_area_struct *vma = walk->vma; | 228 | struct vm_area_struct *vma = walk->vma; |
229 | const struct mm_walk_ops *ops = walk->ops; | ||
223 | 230 | ||
224 | if (walk->test_walk) | 231 | if (ops->test_walk) |
225 | return walk->test_walk(start, end, walk); | 232 | return ops->test_walk(start, end, walk); |
226 | 233 | ||
227 | /* | 234 | /* |
228 | * vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP | 235 | * vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP |
@@ -234,8 +241,8 @@ static int walk_page_test(unsigned long start, unsigned long end, | |||
234 | */ | 241 | */ |
235 | if (vma->vm_flags & VM_PFNMAP) { | 242 | if (vma->vm_flags & VM_PFNMAP) { |
236 | int err = 1; | 243 | int err = 1; |
237 | if (walk->pte_hole) | 244 | if (ops->pte_hole) |
238 | err = walk->pte_hole(start, end, walk); | 245 | err = ops->pte_hole(start, end, walk); |
239 | return err ? err : 1; | 246 | return err ? err : 1; |
240 | } | 247 | } |
241 | return 0; | 248 | return 0; |
@@ -248,7 +255,7 @@ static int __walk_page_range(unsigned long start, unsigned long end, | |||
248 | struct vm_area_struct *vma = walk->vma; | 255 | struct vm_area_struct *vma = walk->vma; |
249 | 256 | ||
250 | if (vma && is_vm_hugetlb_page(vma)) { | 257 | if (vma && is_vm_hugetlb_page(vma)) { |
251 | if (walk->hugetlb_entry) | 258 | if (walk->ops->hugetlb_entry) |
252 | err = walk_hugetlb_range(start, end, walk); | 259 | err = walk_hugetlb_range(start, end, walk); |
253 | } else | 260 | } else |
254 | err = walk_pgd_range(start, end, walk); | 261 | err = walk_pgd_range(start, end, walk); |
@@ -258,11 +265,13 @@ static int __walk_page_range(unsigned long start, unsigned long end, | |||
258 | 265 | ||
259 | /** | 266 | /** |
260 | * walk_page_range - walk page table with caller specific callbacks | 267 | * walk_page_range - walk page table with caller specific callbacks |
261 | * @start: start address of the virtual address range | 268 | * @mm: mm_struct representing the target process of page table walk |
262 | * @end: end address of the virtual address range | 269 | * @start: start address of the virtual address range |
263 | * @walk: mm_walk structure defining the callbacks and the target address space | 270 | * @end: end address of the virtual address range |
271 | * @ops: operation to call during the walk | ||
272 | * @private: private data for callbacks' usage | ||
264 | * | 273 | * |
265 | * Recursively walk the page table tree of the process represented by @walk->mm | 274 | * Recursively walk the page table tree of the process represented by @mm |
266 | * within the virtual address range [@start, @end). During walking, we can do | 275 | * within the virtual address range [@start, @end). During walking, we can do |
267 | * some caller-specific works for each entry, by setting up pmd_entry(), | 276 | * some caller-specific works for each entry, by setting up pmd_entry(), |
268 | * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these | 277 | * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these |
@@ -278,47 +287,52 @@ static int __walk_page_range(unsigned long start, unsigned long end, | |||
278 | * | 287 | * |
279 | * Before starting to walk page table, some callers want to check whether | 288 | * Before starting to walk page table, some callers want to check whether |
280 | * they really want to walk over the current vma, typically by checking | 289 | * they really want to walk over the current vma, typically by checking |
281 | * its vm_flags. walk_page_test() and @walk->test_walk() are used for this | 290 | * its vm_flags. walk_page_test() and @ops->test_walk() are used for this |
282 | * purpose. | 291 | * purpose. |
283 | * | 292 | * |
284 | * struct mm_walk keeps current values of some common data like vma and pmd, | 293 | * struct mm_walk keeps current values of some common data like vma and pmd, |
285 | * which are useful for the access from callbacks. If you want to pass some | 294 | * which are useful for the access from callbacks. If you want to pass some |
286 | * caller-specific data to callbacks, @walk->private should be helpful. | 295 | * caller-specific data to callbacks, @private should be helpful. |
287 | * | 296 | * |
288 | * Locking: | 297 | * Locking: |
289 | * Callers of walk_page_range() and walk_page_vma() should hold | 298 | * Callers of walk_page_range() and walk_page_vma() should hold @mm->mmap_sem, |
290 | * @walk->mm->mmap_sem, because these function traverse vma list and/or | 299 | * because these function traverse vma list and/or access to vma's data. |
291 | * access to vma's data. | ||
292 | */ | 300 | */ |
293 | int walk_page_range(unsigned long start, unsigned long end, | 301 | int walk_page_range(struct mm_struct *mm, unsigned long start, |
294 | struct mm_walk *walk) | 302 | unsigned long end, const struct mm_walk_ops *ops, |
303 | void *private) | ||
295 | { | 304 | { |
296 | int err = 0; | 305 | int err = 0; |
297 | unsigned long next; | 306 | unsigned long next; |
298 | struct vm_area_struct *vma; | 307 | struct vm_area_struct *vma; |
308 | struct mm_walk walk = { | ||
309 | .ops = ops, | ||
310 | .mm = mm, | ||
311 | .private = private, | ||
312 | }; | ||
299 | 313 | ||
300 | if (start >= end) | 314 | if (start >= end) |
301 | return -EINVAL; | 315 | return -EINVAL; |
302 | 316 | ||
303 | if (!walk->mm) | 317 | if (!walk.mm) |
304 | return -EINVAL; | 318 | return -EINVAL; |
305 | 319 | ||
306 | VM_BUG_ON_MM(!rwsem_is_locked(&walk->mm->mmap_sem), walk->mm); | 320 | VM_BUG_ON_MM(!rwsem_is_locked(&walk.mm->mmap_sem), walk.mm); |
307 | 321 | ||
308 | vma = find_vma(walk->mm, start); | 322 | vma = find_vma(walk.mm, start); |
309 | do { | 323 | do { |
310 | if (!vma) { /* after the last vma */ | 324 | if (!vma) { /* after the last vma */ |
311 | walk->vma = NULL; | 325 | walk.vma = NULL; |
312 | next = end; | 326 | next = end; |
313 | } else if (start < vma->vm_start) { /* outside vma */ | 327 | } else if (start < vma->vm_start) { /* outside vma */ |
314 | walk->vma = NULL; | 328 | walk.vma = NULL; |
315 | next = min(end, vma->vm_start); | 329 | next = min(end, vma->vm_start); |
316 | } else { /* inside vma */ | 330 | } else { /* inside vma */ |
317 | walk->vma = vma; | 331 | walk.vma = vma; |
318 | next = min(end, vma->vm_end); | 332 | next = min(end, vma->vm_end); |
319 | vma = vma->vm_next; | 333 | vma = vma->vm_next; |
320 | 334 | ||
321 | err = walk_page_test(start, next, walk); | 335 | err = walk_page_test(start, next, &walk); |
322 | if (err > 0) { | 336 | if (err > 0) { |
323 | /* | 337 | /* |
324 | * positive return values are purely for | 338 | * positive return values are purely for |
@@ -331,28 +345,34 @@ int walk_page_range(unsigned long start, unsigned long end, | |||
331 | if (err < 0) | 345 | if (err < 0) |
332 | break; | 346 | break; |
333 | } | 347 | } |
334 | if (walk->vma || walk->pte_hole) | 348 | if (walk.vma || walk.ops->pte_hole) |
335 | err = __walk_page_range(start, next, walk); | 349 | err = __walk_page_range(start, next, &walk); |
336 | if (err) | 350 | if (err) |
337 | break; | 351 | break; |
338 | } while (start = next, start < end); | 352 | } while (start = next, start < end); |
339 | return err; | 353 | return err; |
340 | } | 354 | } |
341 | 355 | ||
342 | int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk) | 356 | int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, |
357 | void *private) | ||
343 | { | 358 | { |
359 | struct mm_walk walk = { | ||
360 | .ops = ops, | ||
361 | .mm = vma->vm_mm, | ||
362 | .vma = vma, | ||
363 | .private = private, | ||
364 | }; | ||
344 | int err; | 365 | int err; |
345 | 366 | ||
346 | if (!walk->mm) | 367 | if (!walk.mm) |
347 | return -EINVAL; | 368 | return -EINVAL; |
348 | 369 | ||
349 | VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); | 370 | VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem)); |
350 | VM_BUG_ON(!vma); | 371 | |
351 | walk->vma = vma; | 372 | err = walk_page_test(vma->vm_start, vma->vm_end, &walk); |
352 | err = walk_page_test(vma->vm_start, vma->vm_end, walk); | ||
353 | if (err > 0) | 373 | if (err > 0) |
354 | return 0; | 374 | return 0; |
355 | if (err < 0) | 375 | if (err < 0) |
356 | return err; | 376 | return err; |
357 | return __walk_page_range(vma->vm_start, vma->vm_end, walk); | 377 | return __walk_page_range(vma->vm_start, vma->vm_end, &walk); |
358 | } | 378 | } |