aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2005-10-29 21:16:31 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-10-30 00:40:41 -0400
commitc0718806cf955d5eb51ea77bffb5b21d9bba4972 (patch)
treebd29659bbff68604127439ec8144230a40772621
parent67b02f119df50ffad5a4e9e53ea4c896535862cd (diff)
[PATCH] mm: rmap with inner ptlock
rmap's page_check_address descend without page_table_lock. First just pte_offset_map in case there's no pte present worth locking for, then take page_table_lock for the full check, and pass ptl back to caller in the same style as pte_offset_map_lock. __xip_unmap, page_referenced_one and try_to_unmap_one use pte_unmap_unlock. try_to_unmap_cluster also. page_check_address reformatted to avoid progressive indentation. No use is made of its one error code, return NULL when it fails. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/rmap.h4
-rw-r--r--mm/filemap_xip.c12
-rw-r--r--mm/rmap.c109
3 files changed, 60 insertions, 65 deletions
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index e80fb7ee6efd..35b30e6c8cf8 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -95,8 +95,8 @@ int try_to_unmap(struct page *);
95/* 95/*
96 * Called from mm/filemap_xip.c to unmap empty zero page 96 * Called from mm/filemap_xip.c to unmap empty zero page
97 */ 97 */
98pte_t *page_check_address(struct page *, struct mm_struct *, unsigned long); 98pte_t *page_check_address(struct page *, struct mm_struct *,
99 99 unsigned long, spinlock_t **);
100 100
101/* 101/*
102 * Used by swapoff to help locate where page is expected in vma. 102 * Used by swapoff to help locate where page is expected in vma.
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 4e74ad60339a..9cf687e4a29a 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -174,6 +174,7 @@ __xip_unmap (struct address_space * mapping,
174 unsigned long address; 174 unsigned long address;
175 pte_t *pte; 175 pte_t *pte;
176 pte_t pteval; 176 pte_t pteval;
177 spinlock_t *ptl;
177 struct page *page; 178 struct page *page;
178 179
179 spin_lock(&mapping->i_mmap_lock); 180 spin_lock(&mapping->i_mmap_lock);
@@ -183,20 +184,15 @@ __xip_unmap (struct address_space * mapping,
183 ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); 184 ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
184 BUG_ON(address < vma->vm_start || address >= vma->vm_end); 185 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
185 page = ZERO_PAGE(address); 186 page = ZERO_PAGE(address);
186 /* 187 pte = page_check_address(page, mm, address, &ptl);
187 * We need the page_table_lock to protect us from page faults, 188 if (pte) {
188 * munmap, fork, etc...
189 */
190 pte = page_check_address(page, mm, address);
191 if (!IS_ERR(pte)) {
192 /* Nuke the page table entry. */ 189 /* Nuke the page table entry. */
193 flush_cache_page(vma, address, pte_pfn(*pte)); 190 flush_cache_page(vma, address, pte_pfn(*pte));
194 pteval = ptep_clear_flush(vma, address, pte); 191 pteval = ptep_clear_flush(vma, address, pte);
195 page_remove_rmap(page); 192 page_remove_rmap(page);
196 dec_mm_counter(mm, file_rss); 193 dec_mm_counter(mm, file_rss);
197 BUG_ON(pte_dirty(pteval)); 194 BUG_ON(pte_dirty(pteval));
198 pte_unmap(pte); 195 pte_unmap_unlock(pte, ptl);
199 spin_unlock(&mm->page_table_lock);
200 page_cache_release(page); 196 page_cache_release(page);
201 } 197 }
202 } 198 }
diff --git a/mm/rmap.c b/mm/rmap.c
index 4c52c56c9905..a84bdfe582c0 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -247,34 +247,41 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
247 * On success returns with mapped pte and locked mm->page_table_lock. 247 * On success returns with mapped pte and locked mm->page_table_lock.
248 */ 248 */
249pte_t *page_check_address(struct page *page, struct mm_struct *mm, 249pte_t *page_check_address(struct page *page, struct mm_struct *mm,
250 unsigned long address) 250 unsigned long address, spinlock_t **ptlp)
251{ 251{
252 pgd_t *pgd; 252 pgd_t *pgd;
253 pud_t *pud; 253 pud_t *pud;
254 pmd_t *pmd; 254 pmd_t *pmd;
255 pte_t *pte; 255 pte_t *pte;
256 spinlock_t *ptl;
256 257
257 /*
258 * We need the page_table_lock to protect us from page faults,
259 * munmap, fork, etc...
260 */
261 spin_lock(&mm->page_table_lock);
262 pgd = pgd_offset(mm, address); 258 pgd = pgd_offset(mm, address);
263 if (likely(pgd_present(*pgd))) { 259 if (!pgd_present(*pgd))
264 pud = pud_offset(pgd, address); 260 return NULL;
265 if (likely(pud_present(*pud))) { 261
266 pmd = pmd_offset(pud, address); 262 pud = pud_offset(pgd, address);
267 if (likely(pmd_present(*pmd))) { 263 if (!pud_present(*pud))
268 pte = pte_offset_map(pmd, address); 264 return NULL;
269 if (likely(pte_present(*pte) && 265
270 page_to_pfn(page) == pte_pfn(*pte))) 266 pmd = pmd_offset(pud, address);
271 return pte; 267 if (!pmd_present(*pmd))
272 pte_unmap(pte); 268 return NULL;
273 } 269
274 } 270 pte = pte_offset_map(pmd, address);
271 /* Make a quick check before getting the lock */
272 if (!pte_present(*pte)) {
273 pte_unmap(pte);
274 return NULL;
275 } 275 }
276 spin_unlock(&mm->page_table_lock); 276
277 return ERR_PTR(-ENOENT); 277 ptl = &mm->page_table_lock;
278 spin_lock(ptl);
279 if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
280 *ptlp = ptl;
281 return pte;
282 }
283 pte_unmap_unlock(pte, ptl);
284 return NULL;
278} 285}
279 286
280/* 287/*
@@ -287,28 +294,28 @@ static int page_referenced_one(struct page *page,
287 struct mm_struct *mm = vma->vm_mm; 294 struct mm_struct *mm = vma->vm_mm;
288 unsigned long address; 295 unsigned long address;
289 pte_t *pte; 296 pte_t *pte;
297 spinlock_t *ptl;
290 int referenced = 0; 298 int referenced = 0;
291 299
292 address = vma_address(page, vma); 300 address = vma_address(page, vma);
293 if (address == -EFAULT) 301 if (address == -EFAULT)
294 goto out; 302 goto out;
295 303
296 pte = page_check_address(page, mm, address); 304 pte = page_check_address(page, mm, address, &ptl);
297 if (!IS_ERR(pte)) { 305 if (!pte)
298 if (ptep_clear_flush_young(vma, address, pte)) 306 goto out;
299 referenced++;
300 307
301 /* Pretend the page is referenced if the task has the 308 if (ptep_clear_flush_young(vma, address, pte))
302 swap token and is in the middle of a page fault. */ 309 referenced++;
303 if (mm != current->mm && !ignore_token &&
304 has_swap_token(mm) &&
305 rwsem_is_locked(&mm->mmap_sem))
306 referenced++;
307 310
308 (*mapcount)--; 311 /* Pretend the page is referenced if the task has the
309 pte_unmap(pte); 312 swap token and is in the middle of a page fault. */
310 spin_unlock(&mm->page_table_lock); 313 if (mm != current->mm && !ignore_token && has_swap_token(mm) &&
311 } 314 rwsem_is_locked(&mm->mmap_sem))
315 referenced++;
316
317 (*mapcount)--;
318 pte_unmap_unlock(pte, ptl);
312out: 319out:
313 return referenced; 320 return referenced;
314} 321}
@@ -507,14 +514,15 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
507 unsigned long address; 514 unsigned long address;
508 pte_t *pte; 515 pte_t *pte;
509 pte_t pteval; 516 pte_t pteval;
517 spinlock_t *ptl;
510 int ret = SWAP_AGAIN; 518 int ret = SWAP_AGAIN;
511 519
512 address = vma_address(page, vma); 520 address = vma_address(page, vma);
513 if (address == -EFAULT) 521 if (address == -EFAULT)
514 goto out; 522 goto out;
515 523
516 pte = page_check_address(page, mm, address); 524 pte = page_check_address(page, mm, address, &ptl);
517 if (IS_ERR(pte)) 525 if (!pte)
518 goto out; 526 goto out;
519 527
520 /* 528 /*
@@ -564,8 +572,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
564 page_cache_release(page); 572 page_cache_release(page);
565 573
566out_unmap: 574out_unmap:
567 pte_unmap(pte); 575 pte_unmap_unlock(pte, ptl);
568 spin_unlock(&mm->page_table_lock);
569out: 576out:
570 return ret; 577 return ret;
571} 578}
@@ -599,19 +606,14 @@ static void try_to_unmap_cluster(unsigned long cursor,
599 pgd_t *pgd; 606 pgd_t *pgd;
600 pud_t *pud; 607 pud_t *pud;
601 pmd_t *pmd; 608 pmd_t *pmd;
602 pte_t *pte, *original_pte; 609 pte_t *pte;
603 pte_t pteval; 610 pte_t pteval;
611 spinlock_t *ptl;
604 struct page *page; 612 struct page *page;
605 unsigned long address; 613 unsigned long address;
606 unsigned long end; 614 unsigned long end;
607 unsigned long pfn; 615 unsigned long pfn;
608 616
609 /*
610 * We need the page_table_lock to protect us from page faults,
611 * munmap, fork, etc...
612 */
613 spin_lock(&mm->page_table_lock);
614
615 address = (vma->vm_start + cursor) & CLUSTER_MASK; 617 address = (vma->vm_start + cursor) & CLUSTER_MASK;
616 end = address + CLUSTER_SIZE; 618 end = address + CLUSTER_SIZE;
617 if (address < vma->vm_start) 619 if (address < vma->vm_start)
@@ -621,22 +623,22 @@ static void try_to_unmap_cluster(unsigned long cursor,
621 623
622 pgd = pgd_offset(mm, address); 624 pgd = pgd_offset(mm, address);
623 if (!pgd_present(*pgd)) 625 if (!pgd_present(*pgd))
624 goto out_unlock; 626 return;
625 627
626 pud = pud_offset(pgd, address); 628 pud = pud_offset(pgd, address);
627 if (!pud_present(*pud)) 629 if (!pud_present(*pud))
628 goto out_unlock; 630 return;
629 631
630 pmd = pmd_offset(pud, address); 632 pmd = pmd_offset(pud, address);
631 if (!pmd_present(*pmd)) 633 if (!pmd_present(*pmd))
632 goto out_unlock; 634 return;
635
636 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
633 637
634 /* Update high watermark before we lower rss */ 638 /* Update high watermark before we lower rss */
635 update_hiwater_rss(mm); 639 update_hiwater_rss(mm);
636 640
637 for (original_pte = pte = pte_offset_map(pmd, address); 641 for (; address < end; pte++, address += PAGE_SIZE) {
638 address < end; pte++, address += PAGE_SIZE) {
639
640 if (!pte_present(*pte)) 642 if (!pte_present(*pte))
641 continue; 643 continue;
642 644
@@ -669,10 +671,7 @@ static void try_to_unmap_cluster(unsigned long cursor,
669 dec_mm_counter(mm, file_rss); 671 dec_mm_counter(mm, file_rss);
670 (*mapcount)--; 672 (*mapcount)--;
671 } 673 }
672 674 pte_unmap_unlock(pte - 1, ptl);
673 pte_unmap(original_pte);
674out_unlock:
675 spin_unlock(&mm->page_table_lock);
676} 675}
677 676
678static int try_to_unmap_anon(struct page *page) 677static int try_to_unmap_anon(struct page *page)