diff options
author | Hugh Dickins <hugh@veritas.com> | 2005-10-29 21:16:31 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-30 00:40:41 -0400 |
commit | c0718806cf955d5eb51ea77bffb5b21d9bba4972 (patch) | |
tree | bd29659bbff68604127439ec8144230a40772621 /mm/rmap.c | |
parent | 67b02f119df50ffad5a4e9e53ea4c896535862cd (diff) |
[PATCH] mm: rmap with inner ptlock
rmap's page_check_address descend without page_table_lock. First just
pte_offset_map in case there's no pte present worth locking for, then take
page_table_lock for the full check, and pass ptl back to caller in the same
style as pte_offset_map_lock. __xip_unmap, page_referenced_one and
try_to_unmap_one use pte_unmap_unlock. try_to_unmap_cluster also.
page_check_address reformatted to avoid progressive indentation. No use is
made of its one error code, return NULL when it fails.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 109 |
1 files changed, 54 insertions, 55 deletions
@@ -247,34 +247,41 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) | |||
247 | * On success returns with mapped pte and locked mm->page_table_lock. | 247 | * On success returns with mapped pte and locked mm->page_table_lock. |
248 | */ | 248 | */ |
249 | pte_t *page_check_address(struct page *page, struct mm_struct *mm, | 249 | pte_t *page_check_address(struct page *page, struct mm_struct *mm, |
250 | unsigned long address) | 250 | unsigned long address, spinlock_t **ptlp) |
251 | { | 251 | { |
252 | pgd_t *pgd; | 252 | pgd_t *pgd; |
253 | pud_t *pud; | 253 | pud_t *pud; |
254 | pmd_t *pmd; | 254 | pmd_t *pmd; |
255 | pte_t *pte; | 255 | pte_t *pte; |
256 | spinlock_t *ptl; | ||
256 | 257 | ||
257 | /* | ||
258 | * We need the page_table_lock to protect us from page faults, | ||
259 | * munmap, fork, etc... | ||
260 | */ | ||
261 | spin_lock(&mm->page_table_lock); | ||
262 | pgd = pgd_offset(mm, address); | 258 | pgd = pgd_offset(mm, address); |
263 | if (likely(pgd_present(*pgd))) { | 259 | if (!pgd_present(*pgd)) |
264 | pud = pud_offset(pgd, address); | 260 | return NULL; |
265 | if (likely(pud_present(*pud))) { | 261 | |
266 | pmd = pmd_offset(pud, address); | 262 | pud = pud_offset(pgd, address); |
267 | if (likely(pmd_present(*pmd))) { | 263 | if (!pud_present(*pud)) |
268 | pte = pte_offset_map(pmd, address); | 264 | return NULL; |
269 | if (likely(pte_present(*pte) && | 265 | |
270 | page_to_pfn(page) == pte_pfn(*pte))) | 266 | pmd = pmd_offset(pud, address); |
271 | return pte; | 267 | if (!pmd_present(*pmd)) |
272 | pte_unmap(pte); | 268 | return NULL; |
273 | } | 269 | |
274 | } | 270 | pte = pte_offset_map(pmd, address); |
271 | /* Make a quick check before getting the lock */ | ||
272 | if (!pte_present(*pte)) { | ||
273 | pte_unmap(pte); | ||
274 | return NULL; | ||
275 | } | 275 | } |
276 | spin_unlock(&mm->page_table_lock); | 276 | |
277 | return ERR_PTR(-ENOENT); | 277 | ptl = &mm->page_table_lock; |
278 | spin_lock(ptl); | ||
279 | if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) { | ||
280 | *ptlp = ptl; | ||
281 | return pte; | ||
282 | } | ||
283 | pte_unmap_unlock(pte, ptl); | ||
284 | return NULL; | ||
278 | } | 285 | } |
279 | 286 | ||
280 | /* | 287 | /* |
@@ -287,28 +294,28 @@ static int page_referenced_one(struct page *page, | |||
287 | struct mm_struct *mm = vma->vm_mm; | 294 | struct mm_struct *mm = vma->vm_mm; |
288 | unsigned long address; | 295 | unsigned long address; |
289 | pte_t *pte; | 296 | pte_t *pte; |
297 | spinlock_t *ptl; | ||
290 | int referenced = 0; | 298 | int referenced = 0; |
291 | 299 | ||
292 | address = vma_address(page, vma); | 300 | address = vma_address(page, vma); |
293 | if (address == -EFAULT) | 301 | if (address == -EFAULT) |
294 | goto out; | 302 | goto out; |
295 | 303 | ||
296 | pte = page_check_address(page, mm, address); | 304 | pte = page_check_address(page, mm, address, &ptl); |
297 | if (!IS_ERR(pte)) { | 305 | if (!pte) |
298 | if (ptep_clear_flush_young(vma, address, pte)) | 306 | goto out; |
299 | referenced++; | ||
300 | 307 | ||
301 | /* Pretend the page is referenced if the task has the | 308 | if (ptep_clear_flush_young(vma, address, pte)) |
302 | swap token and is in the middle of a page fault. */ | 309 | referenced++; |
303 | if (mm != current->mm && !ignore_token && | ||
304 | has_swap_token(mm) && | ||
305 | rwsem_is_locked(&mm->mmap_sem)) | ||
306 | referenced++; | ||
307 | 310 | ||
308 | (*mapcount)--; | 311 | /* Pretend the page is referenced if the task has the |
309 | pte_unmap(pte); | 312 | swap token and is in the middle of a page fault. */ |
310 | spin_unlock(&mm->page_table_lock); | 313 | if (mm != current->mm && !ignore_token && has_swap_token(mm) && |
311 | } | 314 | rwsem_is_locked(&mm->mmap_sem)) |
315 | referenced++; | ||
316 | |||
317 | (*mapcount)--; | ||
318 | pte_unmap_unlock(pte, ptl); | ||
312 | out: | 319 | out: |
313 | return referenced; | 320 | return referenced; |
314 | } | 321 | } |
@@ -507,14 +514,15 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) | |||
507 | unsigned long address; | 514 | unsigned long address; |
508 | pte_t *pte; | 515 | pte_t *pte; |
509 | pte_t pteval; | 516 | pte_t pteval; |
517 | spinlock_t *ptl; | ||
510 | int ret = SWAP_AGAIN; | 518 | int ret = SWAP_AGAIN; |
511 | 519 | ||
512 | address = vma_address(page, vma); | 520 | address = vma_address(page, vma); |
513 | if (address == -EFAULT) | 521 | if (address == -EFAULT) |
514 | goto out; | 522 | goto out; |
515 | 523 | ||
516 | pte = page_check_address(page, mm, address); | 524 | pte = page_check_address(page, mm, address, &ptl); |
517 | if (IS_ERR(pte)) | 525 | if (!pte) |
518 | goto out; | 526 | goto out; |
519 | 527 | ||
520 | /* | 528 | /* |
@@ -564,8 +572,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) | |||
564 | page_cache_release(page); | 572 | page_cache_release(page); |
565 | 573 | ||
566 | out_unmap: | 574 | out_unmap: |
567 | pte_unmap(pte); | 575 | pte_unmap_unlock(pte, ptl); |
568 | spin_unlock(&mm->page_table_lock); | ||
569 | out: | 576 | out: |
570 | return ret; | 577 | return ret; |
571 | } | 578 | } |
@@ -599,19 +606,14 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
599 | pgd_t *pgd; | 606 | pgd_t *pgd; |
600 | pud_t *pud; | 607 | pud_t *pud; |
601 | pmd_t *pmd; | 608 | pmd_t *pmd; |
602 | pte_t *pte, *original_pte; | 609 | pte_t *pte; |
603 | pte_t pteval; | 610 | pte_t pteval; |
611 | spinlock_t *ptl; | ||
604 | struct page *page; | 612 | struct page *page; |
605 | unsigned long address; | 613 | unsigned long address; |
606 | unsigned long end; | 614 | unsigned long end; |
607 | unsigned long pfn; | 615 | unsigned long pfn; |
608 | 616 | ||
609 | /* | ||
610 | * We need the page_table_lock to protect us from page faults, | ||
611 | * munmap, fork, etc... | ||
612 | */ | ||
613 | spin_lock(&mm->page_table_lock); | ||
614 | |||
615 | address = (vma->vm_start + cursor) & CLUSTER_MASK; | 617 | address = (vma->vm_start + cursor) & CLUSTER_MASK; |
616 | end = address + CLUSTER_SIZE; | 618 | end = address + CLUSTER_SIZE; |
617 | if (address < vma->vm_start) | 619 | if (address < vma->vm_start) |
@@ -621,22 +623,22 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
621 | 623 | ||
622 | pgd = pgd_offset(mm, address); | 624 | pgd = pgd_offset(mm, address); |
623 | if (!pgd_present(*pgd)) | 625 | if (!pgd_present(*pgd)) |
624 | goto out_unlock; | 626 | return; |
625 | 627 | ||
626 | pud = pud_offset(pgd, address); | 628 | pud = pud_offset(pgd, address); |
627 | if (!pud_present(*pud)) | 629 | if (!pud_present(*pud)) |
628 | goto out_unlock; | 630 | return; |
629 | 631 | ||
630 | pmd = pmd_offset(pud, address); | 632 | pmd = pmd_offset(pud, address); |
631 | if (!pmd_present(*pmd)) | 633 | if (!pmd_present(*pmd)) |
632 | goto out_unlock; | 634 | return; |
635 | |||
636 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); | ||
633 | 637 | ||
634 | /* Update high watermark before we lower rss */ | 638 | /* Update high watermark before we lower rss */ |
635 | update_hiwater_rss(mm); | 639 | update_hiwater_rss(mm); |
636 | 640 | ||
637 | for (original_pte = pte = pte_offset_map(pmd, address); | 641 | for (; address < end; pte++, address += PAGE_SIZE) { |
638 | address < end; pte++, address += PAGE_SIZE) { | ||
639 | |||
640 | if (!pte_present(*pte)) | 642 | if (!pte_present(*pte)) |
641 | continue; | 643 | continue; |
642 | 644 | ||
@@ -669,10 +671,7 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
669 | dec_mm_counter(mm, file_rss); | 671 | dec_mm_counter(mm, file_rss); |
670 | (*mapcount)--; | 672 | (*mapcount)--; |
671 | } | 673 | } |
672 | 674 | pte_unmap_unlock(pte - 1, ptl); | |
673 | pte_unmap(original_pte); | ||
674 | out_unlock: | ||
675 | spin_unlock(&mm->page_table_lock); | ||
676 | } | 675 | } |
677 | 676 | ||
678 | static int try_to_unmap_anon(struct page *page) | 677 | static int try_to_unmap_anon(struct page *page) |