aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-10-12 18:22:59 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-10-18 05:30:52 -0400
commiteb66ae030829605d61fbef1909ce310e29f78821 (patch)
tree8807b57c5bfd28b6c9b689a124e0c37257ba002f
parent19e6420e4170acce7a8651dfb87195dff5adbe72 (diff)
mremap: properly flush TLB before releasing the page
Jann Horn points out that our TLB flushing was subtly wrong for the mremap() case. What makes mremap() special is that we don't follow the usual "add page to list of pages to be freed, then flush tlb, and then free pages". No, mremap() obviously just _moves_ the page from one page table location to another. That matters, because mremap() thus doesn't directly control the lifetime of the moved page with a freelist: instead, the lifetime of the page is controlled by the page table locking, that serializes access to the entry. As a result, we need to flush the TLB not just before releasing the lock for the source location (to avoid any concurrent accesses to the entry), but also before we release the destination page table lock (to avoid the TLB being flushed after somebody else has already done something to that page). This also makes the whole "need_flush" logic unnecessary, since we now always end up flushing the TLB for every valid entry. Reported-and-tested-by: Jann Horn <jannh@google.com> Acked-by: Will Deacon <will.deacon@arm.com> Tested-by: Ingo Molnar <mingo@kernel.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--include/linux/huge_mm.h2
-rw-r--r--mm/huge_memory.c10
-rw-r--r--mm/mremap.c30
3 files changed, 18 insertions, 24 deletions
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 99c19b06d9a4..fdcb45999b26 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -43,7 +43,7 @@ extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
43 unsigned char *vec); 43 unsigned char *vec);
44extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, 44extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
45 unsigned long new_addr, unsigned long old_end, 45 unsigned long new_addr, unsigned long old_end,
46 pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush); 46 pmd_t *old_pmd, pmd_t *new_pmd);
47extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, 47extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
48 unsigned long addr, pgprot_t newprot, 48 unsigned long addr, pgprot_t newprot,
49 int prot_numa); 49 int prot_numa);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 58269f8ba7c4..deed97fba979 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1780,7 +1780,7 @@ static pmd_t move_soft_dirty_pmd(pmd_t pmd)
1780 1780
1781bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, 1781bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
1782 unsigned long new_addr, unsigned long old_end, 1782 unsigned long new_addr, unsigned long old_end,
1783 pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush) 1783 pmd_t *old_pmd, pmd_t *new_pmd)
1784{ 1784{
1785 spinlock_t *old_ptl, *new_ptl; 1785 spinlock_t *old_ptl, *new_ptl;
1786 pmd_t pmd; 1786 pmd_t pmd;
@@ -1811,7 +1811,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
1811 if (new_ptl != old_ptl) 1811 if (new_ptl != old_ptl)
1812 spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); 1812 spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
1813 pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd); 1813 pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd);
1814 if (pmd_present(pmd) && pmd_dirty(pmd)) 1814 if (pmd_present(pmd))
1815 force_flush = true; 1815 force_flush = true;
1816 VM_BUG_ON(!pmd_none(*new_pmd)); 1816 VM_BUG_ON(!pmd_none(*new_pmd));
1817 1817
@@ -1822,12 +1822,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
1822 } 1822 }
1823 pmd = move_soft_dirty_pmd(pmd); 1823 pmd = move_soft_dirty_pmd(pmd);
1824 set_pmd_at(mm, new_addr, new_pmd, pmd); 1824 set_pmd_at(mm, new_addr, new_pmd, pmd);
1825 if (new_ptl != old_ptl)
1826 spin_unlock(new_ptl);
1827 if (force_flush) 1825 if (force_flush)
1828 flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); 1826 flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
1829 else 1827 if (new_ptl != old_ptl)
1830 *need_flush = true; 1828 spin_unlock(new_ptl);
1831 spin_unlock(old_ptl); 1829 spin_unlock(old_ptl);
1832 return true; 1830 return true;
1833 } 1831 }
diff --git a/mm/mremap.c b/mm/mremap.c
index 5c2e18505f75..a9617e72e6b7 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -115,7 +115,7 @@ static pte_t move_soft_dirty_pte(pte_t pte)
115static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, 115static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
116 unsigned long old_addr, unsigned long old_end, 116 unsigned long old_addr, unsigned long old_end,
117 struct vm_area_struct *new_vma, pmd_t *new_pmd, 117 struct vm_area_struct *new_vma, pmd_t *new_pmd,
118 unsigned long new_addr, bool need_rmap_locks, bool *need_flush) 118 unsigned long new_addr, bool need_rmap_locks)
119{ 119{
120 struct mm_struct *mm = vma->vm_mm; 120 struct mm_struct *mm = vma->vm_mm;
121 pte_t *old_pte, *new_pte, pte; 121 pte_t *old_pte, *new_pte, pte;
@@ -163,15 +163,17 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
163 163
164 pte = ptep_get_and_clear(mm, old_addr, old_pte); 164 pte = ptep_get_and_clear(mm, old_addr, old_pte);
165 /* 165 /*
166 * If we are remapping a dirty PTE, make sure 166 * If we are remapping a valid PTE, make sure
167 * to flush TLB before we drop the PTL for the 167 * to flush TLB before we drop the PTL for the
168 * old PTE or we may race with page_mkclean(). 168 * PTE.
169 * 169 *
170 * This check has to be done after we removed the 170 * NOTE! Both old and new PTL matter: the old one
171 * old PTE from page tables or another thread may 171 * for racing with page_mkclean(), the new one to
172 * dirty it after the check and before the removal. 172 * make sure the physical page stays valid until
173 * the TLB entry for the old mapping has been
174 * flushed.
173 */ 175 */
174 if (pte_present(pte) && pte_dirty(pte)) 176 if (pte_present(pte))
175 force_flush = true; 177 force_flush = true;
176 pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); 178 pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
177 pte = move_soft_dirty_pte(pte); 179 pte = move_soft_dirty_pte(pte);
@@ -179,13 +181,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
179 } 181 }
180 182
181 arch_leave_lazy_mmu_mode(); 183 arch_leave_lazy_mmu_mode();
184 if (force_flush)
185 flush_tlb_range(vma, old_end - len, old_end);
182 if (new_ptl != old_ptl) 186 if (new_ptl != old_ptl)
183 spin_unlock(new_ptl); 187 spin_unlock(new_ptl);
184 pte_unmap(new_pte - 1); 188 pte_unmap(new_pte - 1);
185 if (force_flush)
186 flush_tlb_range(vma, old_end - len, old_end);
187 else
188 *need_flush = true;
189 pte_unmap_unlock(old_pte - 1, old_ptl); 189 pte_unmap_unlock(old_pte - 1, old_ptl);
190 if (need_rmap_locks) 190 if (need_rmap_locks)
191 drop_rmap_locks(vma); 191 drop_rmap_locks(vma);
@@ -198,7 +198,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
198{ 198{
199 unsigned long extent, next, old_end; 199 unsigned long extent, next, old_end;
200 pmd_t *old_pmd, *new_pmd; 200 pmd_t *old_pmd, *new_pmd;
201 bool need_flush = false;
202 unsigned long mmun_start; /* For mmu_notifiers */ 201 unsigned long mmun_start; /* For mmu_notifiers */
203 unsigned long mmun_end; /* For mmu_notifiers */ 202 unsigned long mmun_end; /* For mmu_notifiers */
204 203
@@ -229,8 +228,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
229 if (need_rmap_locks) 228 if (need_rmap_locks)
230 take_rmap_locks(vma); 229 take_rmap_locks(vma);
231 moved = move_huge_pmd(vma, old_addr, new_addr, 230 moved = move_huge_pmd(vma, old_addr, new_addr,
232 old_end, old_pmd, new_pmd, 231 old_end, old_pmd, new_pmd);
233 &need_flush);
234 if (need_rmap_locks) 232 if (need_rmap_locks)
235 drop_rmap_locks(vma); 233 drop_rmap_locks(vma);
236 if (moved) 234 if (moved)
@@ -246,10 +244,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
246 if (extent > next - new_addr) 244 if (extent > next - new_addr)
247 extent = next - new_addr; 245 extent = next - new_addr;
248 move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma, 246 move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
249 new_pmd, new_addr, need_rmap_locks, &need_flush); 247 new_pmd, new_addr, need_rmap_locks);
250 } 248 }
251 if (need_flush)
252 flush_tlb_range(vma, old_end-len, old_addr);
253 249
254 mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); 250 mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
255 251