diff options
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 146 |
1 files changed, 74 insertions, 72 deletions
@@ -32,7 +32,7 @@ | |||
32 | * page->flags PG_locked (lock_page) | 32 | * page->flags PG_locked (lock_page) |
33 | * mapping->i_mmap_lock | 33 | * mapping->i_mmap_lock |
34 | * anon_vma->lock | 34 | * anon_vma->lock |
35 | * mm->page_table_lock | 35 | * mm->page_table_lock or pte_lock |
36 | * zone->lru_lock (in mark_page_accessed) | 36 | * zone->lru_lock (in mark_page_accessed) |
37 | * swap_lock (in swap_duplicate, swap_info_get) | 37 | * swap_lock (in swap_duplicate, swap_info_get) |
38 | * mmlist_lock (in mmput, drain_mmlist and others) | 38 | * mmlist_lock (in mmput, drain_mmlist and others) |
@@ -244,37 +244,44 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) | |||
244 | /* | 244 | /* |
245 | * Check that @page is mapped at @address into @mm. | 245 | * Check that @page is mapped at @address into @mm. |
246 | * | 246 | * |
247 | * On success returns with mapped pte and locked mm->page_table_lock. | 247 | * On success returns with pte mapped and locked. |
248 | */ | 248 | */ |
249 | pte_t *page_check_address(struct page *page, struct mm_struct *mm, | 249 | pte_t *page_check_address(struct page *page, struct mm_struct *mm, |
250 | unsigned long address) | 250 | unsigned long address, spinlock_t **ptlp) |
251 | { | 251 | { |
252 | pgd_t *pgd; | 252 | pgd_t *pgd; |
253 | pud_t *pud; | 253 | pud_t *pud; |
254 | pmd_t *pmd; | 254 | pmd_t *pmd; |
255 | pte_t *pte; | 255 | pte_t *pte; |
256 | spinlock_t *ptl; | ||
256 | 257 | ||
257 | /* | ||
258 | * We need the page_table_lock to protect us from page faults, | ||
259 | * munmap, fork, etc... | ||
260 | */ | ||
261 | spin_lock(&mm->page_table_lock); | ||
262 | pgd = pgd_offset(mm, address); | 258 | pgd = pgd_offset(mm, address); |
263 | if (likely(pgd_present(*pgd))) { | 259 | if (!pgd_present(*pgd)) |
264 | pud = pud_offset(pgd, address); | 260 | return NULL; |
265 | if (likely(pud_present(*pud))) { | 261 | |
266 | pmd = pmd_offset(pud, address); | 262 | pud = pud_offset(pgd, address); |
267 | if (likely(pmd_present(*pmd))) { | 263 | if (!pud_present(*pud)) |
268 | pte = pte_offset_map(pmd, address); | 264 | return NULL; |
269 | if (likely(pte_present(*pte) && | 265 | |
270 | page_to_pfn(page) == pte_pfn(*pte))) | 266 | pmd = pmd_offset(pud, address); |
271 | return pte; | 267 | if (!pmd_present(*pmd)) |
272 | pte_unmap(pte); | 268 | return NULL; |
273 | } | 269 | |
274 | } | 270 | pte = pte_offset_map(pmd, address); |
271 | /* Make a quick check before getting the lock */ | ||
272 | if (!pte_present(*pte)) { | ||
273 | pte_unmap(pte); | ||
274 | return NULL; | ||
275 | } | ||
276 | |||
277 | ptl = pte_lockptr(mm, pmd); | ||
278 | spin_lock(ptl); | ||
279 | if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) { | ||
280 | *ptlp = ptl; | ||
281 | return pte; | ||
275 | } | 282 | } |
276 | spin_unlock(&mm->page_table_lock); | 283 | pte_unmap_unlock(pte, ptl); |
277 | return ERR_PTR(-ENOENT); | 284 | return NULL; |
278 | } | 285 | } |
279 | 286 | ||
280 | /* | 287 | /* |
@@ -287,24 +294,28 @@ static int page_referenced_one(struct page *page, | |||
287 | struct mm_struct *mm = vma->vm_mm; | 294 | struct mm_struct *mm = vma->vm_mm; |
288 | unsigned long address; | 295 | unsigned long address; |
289 | pte_t *pte; | 296 | pte_t *pte; |
297 | spinlock_t *ptl; | ||
290 | int referenced = 0; | 298 | int referenced = 0; |
291 | 299 | ||
292 | address = vma_address(page, vma); | 300 | address = vma_address(page, vma); |
293 | if (address == -EFAULT) | 301 | if (address == -EFAULT) |
294 | goto out; | 302 | goto out; |
295 | 303 | ||
296 | pte = page_check_address(page, mm, address); | 304 | pte = page_check_address(page, mm, address, &ptl); |
297 | if (!IS_ERR(pte)) { | 305 | if (!pte) |
298 | if (ptep_clear_flush_young(vma, address, pte)) | 306 | goto out; |
299 | referenced++; | ||
300 | 307 | ||
301 | if (mm != current->mm && !ignore_token && has_swap_token(mm)) | 308 | if (ptep_clear_flush_young(vma, address, pte)) |
302 | referenced++; | 309 | referenced++; |
303 | 310 | ||
304 | (*mapcount)--; | 311 | /* Pretend the page is referenced if the task has the |
305 | pte_unmap(pte); | 312 | swap token and is in the middle of a page fault. */ |
306 | spin_unlock(&mm->page_table_lock); | 313 | if (mm != current->mm && !ignore_token && has_swap_token(mm) && |
307 | } | 314 | rwsem_is_locked(&mm->mmap_sem)) |
315 | referenced++; | ||
316 | |||
317 | (*mapcount)--; | ||
318 | pte_unmap_unlock(pte, ptl); | ||
308 | out: | 319 | out: |
309 | return referenced; | 320 | return referenced; |
310 | } | 321 | } |
@@ -434,15 +445,11 @@ int page_referenced(struct page *page, int is_locked, int ignore_token) | |||
434 | * @vma: the vm area in which the mapping is added | 445 | * @vma: the vm area in which the mapping is added |
435 | * @address: the user virtual address mapped | 446 | * @address: the user virtual address mapped |
436 | * | 447 | * |
437 | * The caller needs to hold the mm->page_table_lock. | 448 | * The caller needs to hold the pte lock. |
438 | */ | 449 | */ |
439 | void page_add_anon_rmap(struct page *page, | 450 | void page_add_anon_rmap(struct page *page, |
440 | struct vm_area_struct *vma, unsigned long address) | 451 | struct vm_area_struct *vma, unsigned long address) |
441 | { | 452 | { |
442 | BUG_ON(PageReserved(page)); | ||
443 | |||
444 | inc_mm_counter(vma->vm_mm, anon_rss); | ||
445 | |||
446 | if (atomic_inc_and_test(&page->_mapcount)) { | 453 | if (atomic_inc_and_test(&page->_mapcount)) { |
447 | struct anon_vma *anon_vma = vma->anon_vma; | 454 | struct anon_vma *anon_vma = vma->anon_vma; |
448 | 455 | ||
@@ -461,13 +468,12 @@ void page_add_anon_rmap(struct page *page, | |||
461 | * page_add_file_rmap - add pte mapping to a file page | 468 | * page_add_file_rmap - add pte mapping to a file page |
462 | * @page: the page to add the mapping to | 469 | * @page: the page to add the mapping to |
463 | * | 470 | * |
464 | * The caller needs to hold the mm->page_table_lock. | 471 | * The caller needs to hold the pte lock. |
465 | */ | 472 | */ |
466 | void page_add_file_rmap(struct page *page) | 473 | void page_add_file_rmap(struct page *page) |
467 | { | 474 | { |
468 | BUG_ON(PageAnon(page)); | 475 | BUG_ON(PageAnon(page)); |
469 | if (!pfn_valid(page_to_pfn(page)) || PageReserved(page)) | 476 | BUG_ON(!pfn_valid(page_to_pfn(page))); |
470 | return; | ||
471 | 477 | ||
472 | if (atomic_inc_and_test(&page->_mapcount)) | 478 | if (atomic_inc_and_test(&page->_mapcount)) |
473 | inc_page_state(nr_mapped); | 479 | inc_page_state(nr_mapped); |
@@ -477,12 +483,10 @@ void page_add_file_rmap(struct page *page) | |||
477 | * page_remove_rmap - take down pte mapping from a page | 483 | * page_remove_rmap - take down pte mapping from a page |
478 | * @page: page to remove mapping from | 484 | * @page: page to remove mapping from |
479 | * | 485 | * |
480 | * Caller needs to hold the mm->page_table_lock. | 486 | * The caller needs to hold the pte lock. |
481 | */ | 487 | */ |
482 | void page_remove_rmap(struct page *page) | 488 | void page_remove_rmap(struct page *page) |
483 | { | 489 | { |
484 | BUG_ON(PageReserved(page)); | ||
485 | |||
486 | if (atomic_add_negative(-1, &page->_mapcount)) { | 490 | if (atomic_add_negative(-1, &page->_mapcount)) { |
487 | BUG_ON(page_mapcount(page) < 0); | 491 | BUG_ON(page_mapcount(page) < 0); |
488 | /* | 492 | /* |
@@ -510,14 +514,15 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) | |||
510 | unsigned long address; | 514 | unsigned long address; |
511 | pte_t *pte; | 515 | pte_t *pte; |
512 | pte_t pteval; | 516 | pte_t pteval; |
517 | spinlock_t *ptl; | ||
513 | int ret = SWAP_AGAIN; | 518 | int ret = SWAP_AGAIN; |
514 | 519 | ||
515 | address = vma_address(page, vma); | 520 | address = vma_address(page, vma); |
516 | if (address == -EFAULT) | 521 | if (address == -EFAULT) |
517 | goto out; | 522 | goto out; |
518 | 523 | ||
519 | pte = page_check_address(page, mm, address); | 524 | pte = page_check_address(page, mm, address, &ptl); |
520 | if (IS_ERR(pte)) | 525 | if (!pte) |
521 | goto out; | 526 | goto out; |
522 | 527 | ||
523 | /* | 528 | /* |
@@ -541,8 +546,11 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) | |||
541 | if (pte_dirty(pteval)) | 546 | if (pte_dirty(pteval)) |
542 | set_page_dirty(page); | 547 | set_page_dirty(page); |
543 | 548 | ||
549 | /* Update high watermark before we lower rss */ | ||
550 | update_hiwater_rss(mm); | ||
551 | |||
544 | if (PageAnon(page)) { | 552 | if (PageAnon(page)) { |
545 | swp_entry_t entry = { .val = page->private }; | 553 | swp_entry_t entry = { .val = page_private(page) }; |
546 | /* | 554 | /* |
547 | * Store the swap location in the pte. | 555 | * Store the swap location in the pte. |
548 | * See handle_pte_fault() ... | 556 | * See handle_pte_fault() ... |
@@ -551,21 +559,21 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) | |||
551 | swap_duplicate(entry); | 559 | swap_duplicate(entry); |
552 | if (list_empty(&mm->mmlist)) { | 560 | if (list_empty(&mm->mmlist)) { |
553 | spin_lock(&mmlist_lock); | 561 | spin_lock(&mmlist_lock); |
554 | list_add(&mm->mmlist, &init_mm.mmlist); | 562 | if (list_empty(&mm->mmlist)) |
563 | list_add(&mm->mmlist, &init_mm.mmlist); | ||
555 | spin_unlock(&mmlist_lock); | 564 | spin_unlock(&mmlist_lock); |
556 | } | 565 | } |
557 | set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); | 566 | set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); |
558 | BUG_ON(pte_file(*pte)); | 567 | BUG_ON(pte_file(*pte)); |
559 | dec_mm_counter(mm, anon_rss); | 568 | dec_mm_counter(mm, anon_rss); |
560 | } | 569 | } else |
570 | dec_mm_counter(mm, file_rss); | ||
561 | 571 | ||
562 | dec_mm_counter(mm, rss); | ||
563 | page_remove_rmap(page); | 572 | page_remove_rmap(page); |
564 | page_cache_release(page); | 573 | page_cache_release(page); |
565 | 574 | ||
566 | out_unmap: | 575 | out_unmap: |
567 | pte_unmap(pte); | 576 | pte_unmap_unlock(pte, ptl); |
568 | spin_unlock(&mm->page_table_lock); | ||
569 | out: | 577 | out: |
570 | return ret; | 578 | return ret; |
571 | } | 579 | } |
@@ -599,19 +607,14 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
599 | pgd_t *pgd; | 607 | pgd_t *pgd; |
600 | pud_t *pud; | 608 | pud_t *pud; |
601 | pmd_t *pmd; | 609 | pmd_t *pmd; |
602 | pte_t *pte, *original_pte; | 610 | pte_t *pte; |
603 | pte_t pteval; | 611 | pte_t pteval; |
612 | spinlock_t *ptl; | ||
604 | struct page *page; | 613 | struct page *page; |
605 | unsigned long address; | 614 | unsigned long address; |
606 | unsigned long end; | 615 | unsigned long end; |
607 | unsigned long pfn; | 616 | unsigned long pfn; |
608 | 617 | ||
609 | /* | ||
610 | * We need the page_table_lock to protect us from page faults, | ||
611 | * munmap, fork, etc... | ||
612 | */ | ||
613 | spin_lock(&mm->page_table_lock); | ||
614 | |||
615 | address = (vma->vm_start + cursor) & CLUSTER_MASK; | 618 | address = (vma->vm_start + cursor) & CLUSTER_MASK; |
616 | end = address + CLUSTER_SIZE; | 619 | end = address + CLUSTER_SIZE; |
617 | if (address < vma->vm_start) | 620 | if (address < vma->vm_start) |
@@ -621,30 +624,33 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
621 | 624 | ||
622 | pgd = pgd_offset(mm, address); | 625 | pgd = pgd_offset(mm, address); |
623 | if (!pgd_present(*pgd)) | 626 | if (!pgd_present(*pgd)) |
624 | goto out_unlock; | 627 | return; |
625 | 628 | ||
626 | pud = pud_offset(pgd, address); | 629 | pud = pud_offset(pgd, address); |
627 | if (!pud_present(*pud)) | 630 | if (!pud_present(*pud)) |
628 | goto out_unlock; | 631 | return; |
629 | 632 | ||
630 | pmd = pmd_offset(pud, address); | 633 | pmd = pmd_offset(pud, address); |
631 | if (!pmd_present(*pmd)) | 634 | if (!pmd_present(*pmd)) |
632 | goto out_unlock; | 635 | return; |
636 | |||
637 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); | ||
633 | 638 | ||
634 | for (original_pte = pte = pte_offset_map(pmd, address); | 639 | /* Update high watermark before we lower rss */ |
635 | address < end; pte++, address += PAGE_SIZE) { | 640 | update_hiwater_rss(mm); |
636 | 641 | ||
642 | for (; address < end; pte++, address += PAGE_SIZE) { | ||
637 | if (!pte_present(*pte)) | 643 | if (!pte_present(*pte)) |
638 | continue; | 644 | continue; |
639 | 645 | ||
640 | pfn = pte_pfn(*pte); | 646 | pfn = pte_pfn(*pte); |
641 | if (!pfn_valid(pfn)) | 647 | if (unlikely(!pfn_valid(pfn))) { |
648 | print_bad_pte(vma, *pte, address); | ||
642 | continue; | 649 | continue; |
650 | } | ||
643 | 651 | ||
644 | page = pfn_to_page(pfn); | 652 | page = pfn_to_page(pfn); |
645 | BUG_ON(PageAnon(page)); | 653 | BUG_ON(PageAnon(page)); |
646 | if (PageReserved(page)) | ||
647 | continue; | ||
648 | 654 | ||
649 | if (ptep_clear_flush_young(vma, address, pte)) | 655 | if (ptep_clear_flush_young(vma, address, pte)) |
650 | continue; | 656 | continue; |
@@ -663,13 +669,10 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
663 | 669 | ||
664 | page_remove_rmap(page); | 670 | page_remove_rmap(page); |
665 | page_cache_release(page); | 671 | page_cache_release(page); |
666 | dec_mm_counter(mm, rss); | 672 | dec_mm_counter(mm, file_rss); |
667 | (*mapcount)--; | 673 | (*mapcount)--; |
668 | } | 674 | } |
669 | 675 | pte_unmap_unlock(pte - 1, ptl); | |
670 | pte_unmap(original_pte); | ||
671 | out_unlock: | ||
672 | spin_unlock(&mm->page_table_lock); | ||
673 | } | 676 | } |
674 | 677 | ||
675 | static int try_to_unmap_anon(struct page *page) | 678 | static int try_to_unmap_anon(struct page *page) |
@@ -806,7 +809,6 @@ int try_to_unmap(struct page *page) | |||
806 | { | 809 | { |
807 | int ret; | 810 | int ret; |
808 | 811 | ||
809 | BUG_ON(PageReserved(page)); | ||
810 | BUG_ON(!PageLocked(page)); | 812 | BUG_ON(!PageLocked(page)); |
811 | 813 | ||
812 | if (PageAnon(page)) | 814 | if (PageAnon(page)) |