aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/memory.c101
-rw-r--r--mm/mprotect.c47
2 files changed, 4 insertions, 144 deletions
diff --git a/mm/memory.c b/mm/memory.c
index eba846bcf124..9898eeb9a21c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3606,103 +3606,6 @@ out:
3606 return 0; 3606 return 0;
3607} 3607}
3608 3608
3609/* NUMA hinting page fault entry point for regular pmds */
3610#ifdef CONFIG_NUMA_BALANCING
3611static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3612 unsigned long addr, pmd_t *pmdp)
3613{
3614 pmd_t pmd;
3615 pte_t *pte, *orig_pte;
3616 unsigned long _addr = addr & PMD_MASK;
3617 unsigned long offset;
3618 spinlock_t *ptl;
3619 bool numa = false;
3620 int last_cpupid;
3621
3622 spin_lock(&mm->page_table_lock);
3623 pmd = *pmdp;
3624 if (pmd_numa(pmd)) {
3625 set_pmd_at(mm, _addr, pmdp, pmd_mknonnuma(pmd));
3626 numa = true;
3627 }
3628 spin_unlock(&mm->page_table_lock);
3629
3630 if (!numa)
3631 return 0;
3632
3633 /* we're in a page fault so some vma must be in the range */
3634 BUG_ON(!vma);
3635 BUG_ON(vma->vm_start >= _addr + PMD_SIZE);
3636 offset = max(_addr, vma->vm_start) & ~PMD_MASK;
3637 VM_BUG_ON(offset >= PMD_SIZE);
3638 orig_pte = pte = pte_offset_map_lock(mm, pmdp, _addr, &ptl);
3639 pte += offset >> PAGE_SHIFT;
3640 for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
3641 pte_t pteval = *pte;
3642 struct page *page;
3643 int page_nid = -1;
3644 int target_nid;
3645 bool migrated = false;
3646 int flags = 0;
3647
3648 if (!pte_present(pteval))
3649 continue;
3650 if (!pte_numa(pteval))
3651 continue;
3652 if (addr >= vma->vm_end) {
3653 vma = find_vma(mm, addr);
3654 /* there's a pte present so there must be a vma */
3655 BUG_ON(!vma);
3656 BUG_ON(addr < vma->vm_start);
3657 }
3658 if (pte_numa(pteval)) {
3659 pteval = pte_mknonnuma(pteval);
3660 set_pte_at(mm, addr, pte, pteval);
3661 }
3662 page = vm_normal_page(vma, addr, pteval);
3663 if (unlikely(!page))
3664 continue;
3665
3666 /*
3667 * Avoid grouping on DSO/COW pages in specific and RO pages
3668 * in general, RO pages shouldn't hurt as much anyway since
3669 * they can be in shared cache state.
3670 */
3671 if (!pte_write(pteval))
3672 flags |= TNF_NO_GROUP;
3673
3674 last_cpupid = page_cpupid_last(page);
3675 page_nid = page_to_nid(page);
3676 target_nid = numa_migrate_prep(page, vma, addr, page_nid);
3677 pte_unmap_unlock(pte, ptl);
3678 if (target_nid != -1) {
3679 migrated = migrate_misplaced_page(page, vma, target_nid);
3680 if (migrated) {
3681 page_nid = target_nid;
3682 flags |= TNF_MIGRATED;
3683 }
3684 } else {
3685 put_page(page);
3686 }
3687
3688 if (page_nid != -1)
3689 task_numa_fault(last_cpupid, page_nid, 1, flags);
3690
3691 pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
3692 }
3693 pte_unmap_unlock(orig_pte, ptl);
3694
3695 return 0;
3696}
3697#else
3698static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3699 unsigned long addr, pmd_t *pmdp)
3700{
3701 BUG();
3702 return 0;
3703}
3704#endif /* CONFIG_NUMA_BALANCING */
3705
3706/* 3609/*
3707 * These routines also need to handle stuff like marking pages dirty 3610 * These routines also need to handle stuff like marking pages dirty
3708 * and/or accessed for architectures that don't do it in hardware (most 3611 * and/or accessed for architectures that don't do it in hardware (most
@@ -3841,8 +3744,8 @@ retry:
3841 } 3744 }
3842 } 3745 }
3843 3746
3844 if (pmd_numa(*pmd)) 3747 /* THP should already have been handled */
3845 return do_pmd_numa_page(mm, vma, address, pmd); 3748 BUG_ON(pmd_numa(*pmd));
3846 3749
3847 /* 3750 /*
3848 * Use __pte_alloc instead of pte_alloc_map, because we can't 3751 * Use __pte_alloc instead of pte_alloc_map, because we can't
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 9a74855f1241..a0302ac0be98 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -37,15 +37,12 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
37 37
38static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 38static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
39 unsigned long addr, unsigned long end, pgprot_t newprot, 39 unsigned long addr, unsigned long end, pgprot_t newprot,
40 int dirty_accountable, int prot_numa, bool *ret_all_same_cpupid) 40 int dirty_accountable, int prot_numa)
41{ 41{
42 struct mm_struct *mm = vma->vm_mm; 42 struct mm_struct *mm = vma->vm_mm;
43 pte_t *pte, oldpte; 43 pte_t *pte, oldpte;
44 spinlock_t *ptl; 44 spinlock_t *ptl;
45 unsigned long pages = 0; 45 unsigned long pages = 0;
46 bool all_same_cpupid = true;
47 int last_cpu = -1;
48 int last_pid = -1;
49 46
50 pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 47 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
51 arch_enter_lazy_mmu_mode(); 48 arch_enter_lazy_mmu_mode();
@@ -64,19 +61,6 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
64 61
65 page = vm_normal_page(vma, addr, oldpte); 62 page = vm_normal_page(vma, addr, oldpte);
66 if (page) { 63 if (page) {
67 int cpupid = page_cpupid_last(page);
68 int this_cpu = cpupid_to_cpu(cpupid);
69 int this_pid = cpupid_to_pid(cpupid);
70
71 if (last_cpu == -1)
72 last_cpu = this_cpu;
73 if (last_pid == -1)
74 last_pid = this_pid;
75 if (last_cpu != this_cpu ||
76 last_pid != this_pid) {
77 all_same_cpupid = false;
78 }
79
80 if (!pte_numa(oldpte)) { 64 if (!pte_numa(oldpte)) {
81 ptent = pte_mknuma(ptent); 65 ptent = pte_mknuma(ptent);
82 updated = true; 66 updated = true;
@@ -115,26 +99,9 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
115 arch_leave_lazy_mmu_mode(); 99 arch_leave_lazy_mmu_mode();
116 pte_unmap_unlock(pte - 1, ptl); 100 pte_unmap_unlock(pte - 1, ptl);
117 101
118 *ret_all_same_cpupid = all_same_cpupid;
119 return pages; 102 return pages;
120} 103}
121 104
122#ifdef CONFIG_NUMA_BALANCING
123static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
124 pmd_t *pmd)
125{
126 spin_lock(&mm->page_table_lock);
127 set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd));
128 spin_unlock(&mm->page_table_lock);
129}
130#else
131static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
132 pmd_t *pmd)
133{
134 BUG();
135}
136#endif /* CONFIG_NUMA_BALANCING */
137
138static inline unsigned long change_pmd_range(struct vm_area_struct *vma, 105static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
139 pud_t *pud, unsigned long addr, unsigned long end, 106 pud_t *pud, unsigned long addr, unsigned long end,
140 pgprot_t newprot, int dirty_accountable, int prot_numa) 107 pgprot_t newprot, int dirty_accountable, int prot_numa)
@@ -142,7 +109,6 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
142 pmd_t *pmd; 109 pmd_t *pmd;
143 unsigned long next; 110 unsigned long next;
144 unsigned long pages = 0; 111 unsigned long pages = 0;
145 bool all_same_cpupid;
146 112
147 pmd = pmd_offset(pud, addr); 113 pmd = pmd_offset(pud, addr);
148 do { 114 do {
@@ -168,17 +134,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
168 if (pmd_none_or_clear_bad(pmd)) 134 if (pmd_none_or_clear_bad(pmd))
169 continue; 135 continue;
170 this_pages = change_pte_range(vma, pmd, addr, next, newprot, 136 this_pages = change_pte_range(vma, pmd, addr, next, newprot,
171 dirty_accountable, prot_numa, &all_same_cpupid); 137 dirty_accountable, prot_numa);
172 pages += this_pages; 138 pages += this_pages;
173
174 /*
175 * If we are changing protections for NUMA hinting faults then
176 * set pmd_numa if the examined pages were all on the same
177 * node. This allows a regular PMD to be handled as one fault
178 * and effectively batches the taking of the PTL
179 */
180 if (prot_numa && this_pages && all_same_cpupid)
181 change_pmd_protnuma(vma->vm_mm, addr, pmd);
182 } while (pmd++, addr = next, addr != end); 139 } while (pmd++, addr = next, addr != end);
183 140
184 return pages; 141 return pages;