aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2008-08-20 17:09:18 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-08-20 18:40:32 -0400
commit479db0bf408e65baa14d2a9821abfcbc0804b847 (patch)
treeacdaaed567afefa36ac2fe27cfe22cfefeb50cd5
parent2d70b68d42b5196a48ccb639e3797f097ef5bea3 (diff)
mm: dirty page tracking race fix
There is a race with dirty page accounting where a page may not properly be accounted for. clear_page_dirty_for_io() calls page_mkclean; then TestClearPageDirty. page_mkclean walks the rmaps for that page, and for each one it cleans and write protects the pte if it was dirty. It uses page_check_address to find the pte. That function has a shortcut to avoid the ptl if the pte is not present. Unfortunately, the pte can be switched to not-present then back to present by other code while holding the page table lock -- this should not be a signal for page_mkclean to ignore that pte, because it may be dirty. For example, powerpc64's set_pte_at will clear a previously present pte before setting it to the desired value. There may also be other code in core mm or in arch which do similar things. The consequence of the bug is loss of data integrity due to msync, and loss of dirty page accounting accuracy. XIP's __xip_unmap could easily also be unreliable (depending on the exact XIP locking scheme), which can lead to data corruption. Fix this by having an option to always take ptl to check the pte in page_check_address. It's possible to retain this optimization for page_referenced and try_to_unmap. Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Jared Hulbert <jaredeh@gmail.com> Cc: Carsten Otte <cotte@freenet.de> Cc: Hugh Dickins <hugh@veritas.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/rmap.h2
-rw-r--r--mm/filemap_xip.c2
-rw-r--r--mm/rmap.c14
3 files changed, 11 insertions, 7 deletions
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 69407f85e10b..fed6f5e0b411 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -102,7 +102,7 @@ int try_to_unmap(struct page *, int ignore_refs);
102 * Called from mm/filemap_xip.c to unmap empty zero page 102 * Called from mm/filemap_xip.c to unmap empty zero page
103 */ 103 */
104pte_t *page_check_address(struct page *, struct mm_struct *, 104pte_t *page_check_address(struct page *, struct mm_struct *,
105 unsigned long, spinlock_t **); 105 unsigned long, spinlock_t **, int);
106 106
107/* 107/*
108 * Used by swapoff to help locate where page is expected in vma. 108 * Used by swapoff to help locate where page is expected in vma.
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 380ab402d711..8b710ca13247 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -185,7 +185,7 @@ __xip_unmap (struct address_space * mapping,
185 address = vma->vm_start + 185 address = vma->vm_start +
186 ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); 186 ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
187 BUG_ON(address < vma->vm_start || address >= vma->vm_end); 187 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
188 pte = page_check_address(page, mm, address, &ptl); 188 pte = page_check_address(page, mm, address, &ptl, 1);
189 if (pte) { 189 if (pte) {
190 /* Nuke the page table entry. */ 190 /* Nuke the page table entry. */
191 flush_cache_page(vma, address, pte_pfn(*pte)); 191 flush_cache_page(vma, address, pte_pfn(*pte));
diff --git a/mm/rmap.c b/mm/rmap.c
index 059774712c08..0383acfcb068 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -224,10 +224,14 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
224/* 224/*
225 * Check that @page is mapped at @address into @mm. 225 * Check that @page is mapped at @address into @mm.
226 * 226 *
227 * If @sync is false, page_check_address may perform a racy check to avoid
228 * the page table lock when the pte is not present (helpful when reclaiming
229 * highly shared pages).
230 *
227 * On success returns with pte mapped and locked. 231 * On success returns with pte mapped and locked.
228 */ 232 */
229pte_t *page_check_address(struct page *page, struct mm_struct *mm, 233pte_t *page_check_address(struct page *page, struct mm_struct *mm,
230 unsigned long address, spinlock_t **ptlp) 234 unsigned long address, spinlock_t **ptlp, int sync)
231{ 235{
232 pgd_t *pgd; 236 pgd_t *pgd;
233 pud_t *pud; 237 pud_t *pud;
@@ -249,7 +253,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
249 253
250 pte = pte_offset_map(pmd, address); 254 pte = pte_offset_map(pmd, address);
251 /* Make a quick check before getting the lock */ 255 /* Make a quick check before getting the lock */
252 if (!pte_present(*pte)) { 256 if (!sync && !pte_present(*pte)) {
253 pte_unmap(pte); 257 pte_unmap(pte);
254 return NULL; 258 return NULL;
255 } 259 }
@@ -281,7 +285,7 @@ static int page_referenced_one(struct page *page,
281 if (address == -EFAULT) 285 if (address == -EFAULT)
282 goto out; 286 goto out;
283 287
284 pte = page_check_address(page, mm, address, &ptl); 288 pte = page_check_address(page, mm, address, &ptl, 0);
285 if (!pte) 289 if (!pte)
286 goto out; 290 goto out;
287 291
@@ -450,7 +454,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
450 if (address == -EFAULT) 454 if (address == -EFAULT)
451 goto out; 455 goto out;
452 456
453 pte = page_check_address(page, mm, address, &ptl); 457 pte = page_check_address(page, mm, address, &ptl, 1);
454 if (!pte) 458 if (!pte)
455 goto out; 459 goto out;
456 460
@@ -704,7 +708,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
704 if (address == -EFAULT) 708 if (address == -EFAULT)
705 goto out; 709 goto out;
706 710
707 pte = page_check_address(page, mm, address, &ptl); 711 pte = page_check_address(page, mm, address, &ptl, 0);
708 if (!pte) 712 if (!pte)
709 goto out; 713 goto out;
710 714