aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mprotect.c
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2013-10-07 06:29:25 -0400
committerIngo Molnar <mingo@kernel.org>2013-10-09 08:47:55 -0400
commit0f19c17929c952c6f0966d93ab05558e7bf814cc (patch)
treea881a5c520d8d0791dd73859f51c87285d3a06be /mm/mprotect.c
parent6688cc05473b36a0a3d3971e1adf1712919b32eb (diff)
mm: numa: Do not batch handle PMD pages
With the THP migration races closed it is still possible to occasionally see corruption. The problem is related to handling PMD pages in batch. When a page fault is handled it can be assumed that the page being faulted will also be flushed from the TLB. The same flushing does not happen when handling PMD pages in batch. Fixing is straight forward but there are a number of reasons not to 1. Multiple TLB flushes may have to be sent depending on what pages get migrated 2. The handling of PMDs in batch means that faults get accounted to the task that is handling the fault. While care is taken to only mark PMDs where the last CPU and PID match it can still have problems due to PID truncation when matching PIDs. 3. Batching on the PMD level may reduce faults but setting pmd_numa requires taking a heavy lock that can contend with THP migration and handling the fault requires the release/acquisition of the PTL for every page migrated. It's still pretty heavy. PMD batch handling is not something that people ever have been happy with. This patch removes it and later patches will deal with the additional fault overhead using more installigent migrate rate adaption. Signed-off-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1381141781-10992-48-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'mm/mprotect.c')
-rw-r--r--mm/mprotect.c47
1 files changed, 2 insertions, 45 deletions
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 9a74855f1241..a0302ac0be98 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -37,15 +37,12 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
37 37
38static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 38static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
39 unsigned long addr, unsigned long end, pgprot_t newprot, 39 unsigned long addr, unsigned long end, pgprot_t newprot,
40 int dirty_accountable, int prot_numa, bool *ret_all_same_cpupid) 40 int dirty_accountable, int prot_numa)
41{ 41{
42 struct mm_struct *mm = vma->vm_mm; 42 struct mm_struct *mm = vma->vm_mm;
43 pte_t *pte, oldpte; 43 pte_t *pte, oldpte;
44 spinlock_t *ptl; 44 spinlock_t *ptl;
45 unsigned long pages = 0; 45 unsigned long pages = 0;
46 bool all_same_cpupid = true;
47 int last_cpu = -1;
48 int last_pid = -1;
49 46
50 pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 47 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
51 arch_enter_lazy_mmu_mode(); 48 arch_enter_lazy_mmu_mode();
@@ -64,19 +61,6 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
64 61
65 page = vm_normal_page(vma, addr, oldpte); 62 page = vm_normal_page(vma, addr, oldpte);
66 if (page) { 63 if (page) {
67 int cpupid = page_cpupid_last(page);
68 int this_cpu = cpupid_to_cpu(cpupid);
69 int this_pid = cpupid_to_pid(cpupid);
70
71 if (last_cpu == -1)
72 last_cpu = this_cpu;
73 if (last_pid == -1)
74 last_pid = this_pid;
75 if (last_cpu != this_cpu ||
76 last_pid != this_pid) {
77 all_same_cpupid = false;
78 }
79
80 if (!pte_numa(oldpte)) { 64 if (!pte_numa(oldpte)) {
81 ptent = pte_mknuma(ptent); 65 ptent = pte_mknuma(ptent);
82 updated = true; 66 updated = true;
@@ -115,26 +99,9 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
115 arch_leave_lazy_mmu_mode(); 99 arch_leave_lazy_mmu_mode();
116 pte_unmap_unlock(pte - 1, ptl); 100 pte_unmap_unlock(pte - 1, ptl);
117 101
118 *ret_all_same_cpupid = all_same_cpupid;
119 return pages; 102 return pages;
120} 103}
121 104
122#ifdef CONFIG_NUMA_BALANCING
123static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
124 pmd_t *pmd)
125{
126 spin_lock(&mm->page_table_lock);
127 set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd));
128 spin_unlock(&mm->page_table_lock);
129}
130#else
131static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
132 pmd_t *pmd)
133{
134 BUG();
135}
136#endif /* CONFIG_NUMA_BALANCING */
137
138static inline unsigned long change_pmd_range(struct vm_area_struct *vma, 105static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
139 pud_t *pud, unsigned long addr, unsigned long end, 106 pud_t *pud, unsigned long addr, unsigned long end,
140 pgprot_t newprot, int dirty_accountable, int prot_numa) 107 pgprot_t newprot, int dirty_accountable, int prot_numa)
@@ -142,7 +109,6 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
142 pmd_t *pmd; 109 pmd_t *pmd;
143 unsigned long next; 110 unsigned long next;
144 unsigned long pages = 0; 111 unsigned long pages = 0;
145 bool all_same_cpupid;
146 112
147 pmd = pmd_offset(pud, addr); 113 pmd = pmd_offset(pud, addr);
148 do { 114 do {
@@ -168,17 +134,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
168 if (pmd_none_or_clear_bad(pmd)) 134 if (pmd_none_or_clear_bad(pmd))
169 continue; 135 continue;
170 this_pages = change_pte_range(vma, pmd, addr, next, newprot, 136 this_pages = change_pte_range(vma, pmd, addr, next, newprot,
171 dirty_accountable, prot_numa, &all_same_cpupid); 137 dirty_accountable, prot_numa);
172 pages += this_pages; 138 pages += this_pages;
173
174 /*
175 * If we are changing protections for NUMA hinting faults then
176 * set pmd_numa if the examined pages were all on the same
177 * node. This allows a regular PMD to be handled as one fault
178 * and effectively batches the taking of the PTL
179 */
180 if (prot_numa && this_pages && all_same_cpupid)
181 change_pmd_protnuma(vma->vm_mm, addr, pmd);
182 } while (pmd++, addr = next, addr != end); 139 } while (pmd++, addr = next, addr != end);
183 140
184 return pages; 141 return pages;