diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memory.c | 101 | ||||
-rw-r--r-- | mm/mprotect.c | 47 |
2 files changed, 4 insertions, 144 deletions
diff --git a/mm/memory.c b/mm/memory.c index eba846bcf124..9898eeb9a21c 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -3606,103 +3606,6 @@ out: | |||
3606 | return 0; | 3606 | return 0; |
3607 | } | 3607 | } |
3608 | 3608 | ||
3609 | /* NUMA hinting page fault entry point for regular pmds */ | ||
3610 | #ifdef CONFIG_NUMA_BALANCING | ||
3611 | static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | ||
3612 | unsigned long addr, pmd_t *pmdp) | ||
3613 | { | ||
3614 | pmd_t pmd; | ||
3615 | pte_t *pte, *orig_pte; | ||
3616 | unsigned long _addr = addr & PMD_MASK; | ||
3617 | unsigned long offset; | ||
3618 | spinlock_t *ptl; | ||
3619 | bool numa = false; | ||
3620 | int last_cpupid; | ||
3621 | |||
3622 | spin_lock(&mm->page_table_lock); | ||
3623 | pmd = *pmdp; | ||
3624 | if (pmd_numa(pmd)) { | ||
3625 | set_pmd_at(mm, _addr, pmdp, pmd_mknonnuma(pmd)); | ||
3626 | numa = true; | ||
3627 | } | ||
3628 | spin_unlock(&mm->page_table_lock); | ||
3629 | |||
3630 | if (!numa) | ||
3631 | return 0; | ||
3632 | |||
3633 | /* we're in a page fault so some vma must be in the range */ | ||
3634 | BUG_ON(!vma); | ||
3635 | BUG_ON(vma->vm_start >= _addr + PMD_SIZE); | ||
3636 | offset = max(_addr, vma->vm_start) & ~PMD_MASK; | ||
3637 | VM_BUG_ON(offset >= PMD_SIZE); | ||
3638 | orig_pte = pte = pte_offset_map_lock(mm, pmdp, _addr, &ptl); | ||
3639 | pte += offset >> PAGE_SHIFT; | ||
3640 | for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { | ||
3641 | pte_t pteval = *pte; | ||
3642 | struct page *page; | ||
3643 | int page_nid = -1; | ||
3644 | int target_nid; | ||
3645 | bool migrated = false; | ||
3646 | int flags = 0; | ||
3647 | |||
3648 | if (!pte_present(pteval)) | ||
3649 | continue; | ||
3650 | if (!pte_numa(pteval)) | ||
3651 | continue; | ||
3652 | if (addr >= vma->vm_end) { | ||
3653 | vma = find_vma(mm, addr); | ||
3654 | /* there's a pte present so there must be a vma */ | ||
3655 | BUG_ON(!vma); | ||
3656 | BUG_ON(addr < vma->vm_start); | ||
3657 | } | ||
3658 | if (pte_numa(pteval)) { | ||
3659 | pteval = pte_mknonnuma(pteval); | ||
3660 | set_pte_at(mm, addr, pte, pteval); | ||
3661 | } | ||
3662 | page = vm_normal_page(vma, addr, pteval); | ||
3663 | if (unlikely(!page)) | ||
3664 | continue; | ||
3665 | |||
3666 | /* | ||
3667 | * Avoid grouping on DSO/COW pages in specific and RO pages | ||
3668 | * in general, RO pages shouldn't hurt as much anyway since | ||
3669 | * they can be in shared cache state. | ||
3670 | */ | ||
3671 | if (!pte_write(pteval)) | ||
3672 | flags |= TNF_NO_GROUP; | ||
3673 | |||
3674 | last_cpupid = page_cpupid_last(page); | ||
3675 | page_nid = page_to_nid(page); | ||
3676 | target_nid = numa_migrate_prep(page, vma, addr, page_nid); | ||
3677 | pte_unmap_unlock(pte, ptl); | ||
3678 | if (target_nid != -1) { | ||
3679 | migrated = migrate_misplaced_page(page, vma, target_nid); | ||
3680 | if (migrated) { | ||
3681 | page_nid = target_nid; | ||
3682 | flags |= TNF_MIGRATED; | ||
3683 | } | ||
3684 | } else { | ||
3685 | put_page(page); | ||
3686 | } | ||
3687 | |||
3688 | if (page_nid != -1) | ||
3689 | task_numa_fault(last_cpupid, page_nid, 1, flags); | ||
3690 | |||
3691 | pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); | ||
3692 | } | ||
3693 | pte_unmap_unlock(orig_pte, ptl); | ||
3694 | |||
3695 | return 0; | ||
3696 | } | ||
3697 | #else | ||
3698 | static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | ||
3699 | unsigned long addr, pmd_t *pmdp) | ||
3700 | { | ||
3701 | BUG(); | ||
3702 | return 0; | ||
3703 | } | ||
3704 | #endif /* CONFIG_NUMA_BALANCING */ | ||
3705 | |||
3706 | /* | 3609 | /* |
3707 | * These routines also need to handle stuff like marking pages dirty | 3610 | * These routines also need to handle stuff like marking pages dirty |
3708 | * and/or accessed for architectures that don't do it in hardware (most | 3611 | * and/or accessed for architectures that don't do it in hardware (most |
@@ -3841,8 +3744,8 @@ retry: | |||
3841 | } | 3744 | } |
3842 | } | 3745 | } |
3843 | 3746 | ||
3844 | if (pmd_numa(*pmd)) | 3747 | /* THP should already have been handled */ |
3845 | return do_pmd_numa_page(mm, vma, address, pmd); | 3748 | BUG_ON(pmd_numa(*pmd)); |
3846 | 3749 | ||
3847 | /* | 3750 | /* |
3848 | * Use __pte_alloc instead of pte_alloc_map, because we can't | 3751 | * Use __pte_alloc instead of pte_alloc_map, because we can't |
diff --git a/mm/mprotect.c b/mm/mprotect.c index 9a74855f1241..a0302ac0be98 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -37,15 +37,12 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) | |||
37 | 37 | ||
38 | static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | 38 | static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, |
39 | unsigned long addr, unsigned long end, pgprot_t newprot, | 39 | unsigned long addr, unsigned long end, pgprot_t newprot, |
40 | int dirty_accountable, int prot_numa, bool *ret_all_same_cpupid) | 40 | int dirty_accountable, int prot_numa) |
41 | { | 41 | { |
42 | struct mm_struct *mm = vma->vm_mm; | 42 | struct mm_struct *mm = vma->vm_mm; |
43 | pte_t *pte, oldpte; | 43 | pte_t *pte, oldpte; |
44 | spinlock_t *ptl; | 44 | spinlock_t *ptl; |
45 | unsigned long pages = 0; | 45 | unsigned long pages = 0; |
46 | bool all_same_cpupid = true; | ||
47 | int last_cpu = -1; | ||
48 | int last_pid = -1; | ||
49 | 46 | ||
50 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); | 47 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); |
51 | arch_enter_lazy_mmu_mode(); | 48 | arch_enter_lazy_mmu_mode(); |
@@ -64,19 +61,6 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
64 | 61 | ||
65 | page = vm_normal_page(vma, addr, oldpte); | 62 | page = vm_normal_page(vma, addr, oldpte); |
66 | if (page) { | 63 | if (page) { |
67 | int cpupid = page_cpupid_last(page); | ||
68 | int this_cpu = cpupid_to_cpu(cpupid); | ||
69 | int this_pid = cpupid_to_pid(cpupid); | ||
70 | |||
71 | if (last_cpu == -1) | ||
72 | last_cpu = this_cpu; | ||
73 | if (last_pid == -1) | ||
74 | last_pid = this_pid; | ||
75 | if (last_cpu != this_cpu || | ||
76 | last_pid != this_pid) { | ||
77 | all_same_cpupid = false; | ||
78 | } | ||
79 | |||
80 | if (!pte_numa(oldpte)) { | 64 | if (!pte_numa(oldpte)) { |
81 | ptent = pte_mknuma(ptent); | 65 | ptent = pte_mknuma(ptent); |
82 | updated = true; | 66 | updated = true; |
@@ -115,26 +99,9 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
115 | arch_leave_lazy_mmu_mode(); | 99 | arch_leave_lazy_mmu_mode(); |
116 | pte_unmap_unlock(pte - 1, ptl); | 100 | pte_unmap_unlock(pte - 1, ptl); |
117 | 101 | ||
118 | *ret_all_same_cpupid = all_same_cpupid; | ||
119 | return pages; | 102 | return pages; |
120 | } | 103 | } |
121 | 104 | ||
122 | #ifdef CONFIG_NUMA_BALANCING | ||
123 | static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr, | ||
124 | pmd_t *pmd) | ||
125 | { | ||
126 | spin_lock(&mm->page_table_lock); | ||
127 | set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd)); | ||
128 | spin_unlock(&mm->page_table_lock); | ||
129 | } | ||
130 | #else | ||
131 | static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr, | ||
132 | pmd_t *pmd) | ||
133 | { | ||
134 | BUG(); | ||
135 | } | ||
136 | #endif /* CONFIG_NUMA_BALANCING */ | ||
137 | |||
138 | static inline unsigned long change_pmd_range(struct vm_area_struct *vma, | 105 | static inline unsigned long change_pmd_range(struct vm_area_struct *vma, |
139 | pud_t *pud, unsigned long addr, unsigned long end, | 106 | pud_t *pud, unsigned long addr, unsigned long end, |
140 | pgprot_t newprot, int dirty_accountable, int prot_numa) | 107 | pgprot_t newprot, int dirty_accountable, int prot_numa) |
@@ -142,7 +109,6 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, | |||
142 | pmd_t *pmd; | 109 | pmd_t *pmd; |
143 | unsigned long next; | 110 | unsigned long next; |
144 | unsigned long pages = 0; | 111 | unsigned long pages = 0; |
145 | bool all_same_cpupid; | ||
146 | 112 | ||
147 | pmd = pmd_offset(pud, addr); | 113 | pmd = pmd_offset(pud, addr); |
148 | do { | 114 | do { |
@@ -168,17 +134,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, | |||
168 | if (pmd_none_or_clear_bad(pmd)) | 134 | if (pmd_none_or_clear_bad(pmd)) |
169 | continue; | 135 | continue; |
170 | this_pages = change_pte_range(vma, pmd, addr, next, newprot, | 136 | this_pages = change_pte_range(vma, pmd, addr, next, newprot, |
171 | dirty_accountable, prot_numa, &all_same_cpupid); | 137 | dirty_accountable, prot_numa); |
172 | pages += this_pages; | 138 | pages += this_pages; |
173 | |||
174 | /* | ||
175 | * If we are changing protections for NUMA hinting faults then | ||
176 | * set pmd_numa if the examined pages were all on the same | ||
177 | * node. This allows a regular PMD to be handled as one fault | ||
178 | * and effectively batches the taking of the PTL | ||
179 | */ | ||
180 | if (prot_numa && this_pages && all_same_cpupid) | ||
181 | change_pmd_protnuma(vma->vm_mm, addr, pmd); | ||
182 | } while (pmd++, addr = next, addr != end); | 139 | } while (pmd++, addr = next, addr != end); |
183 | 140 | ||
184 | return pages; | 141 | return pages; |