diff options
Diffstat (limited to 'mm/rmap.c')
| -rw-r--r-- | mm/rmap.c | 146 |
1 files changed, 74 insertions, 72 deletions
| @@ -32,7 +32,7 @@ | |||
| 32 | * page->flags PG_locked (lock_page) | 32 | * page->flags PG_locked (lock_page) |
| 33 | * mapping->i_mmap_lock | 33 | * mapping->i_mmap_lock |
| 34 | * anon_vma->lock | 34 | * anon_vma->lock |
| 35 | * mm->page_table_lock | 35 | * mm->page_table_lock or pte_lock |
| 36 | * zone->lru_lock (in mark_page_accessed) | 36 | * zone->lru_lock (in mark_page_accessed) |
| 37 | * swap_lock (in swap_duplicate, swap_info_get) | 37 | * swap_lock (in swap_duplicate, swap_info_get) |
| 38 | * mmlist_lock (in mmput, drain_mmlist and others) | 38 | * mmlist_lock (in mmput, drain_mmlist and others) |
| @@ -244,37 +244,44 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) | |||
| 244 | /* | 244 | /* |
| 245 | * Check that @page is mapped at @address into @mm. | 245 | * Check that @page is mapped at @address into @mm. |
| 246 | * | 246 | * |
| 247 | * On success returns with mapped pte and locked mm->page_table_lock. | 247 | * On success returns with pte mapped and locked. |
| 248 | */ | 248 | */ |
| 249 | pte_t *page_check_address(struct page *page, struct mm_struct *mm, | 249 | pte_t *page_check_address(struct page *page, struct mm_struct *mm, |
| 250 | unsigned long address) | 250 | unsigned long address, spinlock_t **ptlp) |
| 251 | { | 251 | { |
| 252 | pgd_t *pgd; | 252 | pgd_t *pgd; |
| 253 | pud_t *pud; | 253 | pud_t *pud; |
| 254 | pmd_t *pmd; | 254 | pmd_t *pmd; |
| 255 | pte_t *pte; | 255 | pte_t *pte; |
| 256 | spinlock_t *ptl; | ||
| 256 | 257 | ||
| 257 | /* | ||
| 258 | * We need the page_table_lock to protect us from page faults, | ||
| 259 | * munmap, fork, etc... | ||
| 260 | */ | ||
| 261 | spin_lock(&mm->page_table_lock); | ||
| 262 | pgd = pgd_offset(mm, address); | 258 | pgd = pgd_offset(mm, address); |
| 263 | if (likely(pgd_present(*pgd))) { | 259 | if (!pgd_present(*pgd)) |
| 264 | pud = pud_offset(pgd, address); | 260 | return NULL; |
| 265 | if (likely(pud_present(*pud))) { | 261 | |
| 266 | pmd = pmd_offset(pud, address); | 262 | pud = pud_offset(pgd, address); |
| 267 | if (likely(pmd_present(*pmd))) { | 263 | if (!pud_present(*pud)) |
| 268 | pte = pte_offset_map(pmd, address); | 264 | return NULL; |
| 269 | if (likely(pte_present(*pte) && | 265 | |
| 270 | page_to_pfn(page) == pte_pfn(*pte))) | 266 | pmd = pmd_offset(pud, address); |
| 271 | return pte; | 267 | if (!pmd_present(*pmd)) |
| 272 | pte_unmap(pte); | 268 | return NULL; |
| 273 | } | 269 | |
| 274 | } | 270 | pte = pte_offset_map(pmd, address); |
| 271 | /* Make a quick check before getting the lock */ | ||
| 272 | if (!pte_present(*pte)) { | ||
| 273 | pte_unmap(pte); | ||
| 274 | return NULL; | ||
| 275 | } | ||
| 276 | |||
| 277 | ptl = pte_lockptr(mm, pmd); | ||
| 278 | spin_lock(ptl); | ||
| 279 | if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) { | ||
| 280 | *ptlp = ptl; | ||
| 281 | return pte; | ||
| 275 | } | 282 | } |
| 276 | spin_unlock(&mm->page_table_lock); | 283 | pte_unmap_unlock(pte, ptl); |
| 277 | return ERR_PTR(-ENOENT); | 284 | return NULL; |
| 278 | } | 285 | } |
| 279 | 286 | ||
| 280 | /* | 287 | /* |
| @@ -287,24 +294,28 @@ static int page_referenced_one(struct page *page, | |||
| 287 | struct mm_struct *mm = vma->vm_mm; | 294 | struct mm_struct *mm = vma->vm_mm; |
| 288 | unsigned long address; | 295 | unsigned long address; |
| 289 | pte_t *pte; | 296 | pte_t *pte; |
| 297 | spinlock_t *ptl; | ||
| 290 | int referenced = 0; | 298 | int referenced = 0; |
| 291 | 299 | ||
| 292 | address = vma_address(page, vma); | 300 | address = vma_address(page, vma); |
| 293 | if (address == -EFAULT) | 301 | if (address == -EFAULT) |
| 294 | goto out; | 302 | goto out; |
| 295 | 303 | ||
| 296 | pte = page_check_address(page, mm, address); | 304 | pte = page_check_address(page, mm, address, &ptl); |
| 297 | if (!IS_ERR(pte)) { | 305 | if (!pte) |
| 298 | if (ptep_clear_flush_young(vma, address, pte)) | 306 | goto out; |
| 299 | referenced++; | ||
| 300 | 307 | ||
| 301 | if (mm != current->mm && !ignore_token && has_swap_token(mm)) | 308 | if (ptep_clear_flush_young(vma, address, pte)) |
| 302 | referenced++; | 309 | referenced++; |
| 303 | 310 | ||
| 304 | (*mapcount)--; | 311 | /* Pretend the page is referenced if the task has the |
| 305 | pte_unmap(pte); | 312 | swap token and is in the middle of a page fault. */ |
| 306 | spin_unlock(&mm->page_table_lock); | 313 | if (mm != current->mm && !ignore_token && has_swap_token(mm) && |
| 307 | } | 314 | rwsem_is_locked(&mm->mmap_sem)) |
| 315 | referenced++; | ||
| 316 | |||
| 317 | (*mapcount)--; | ||
| 318 | pte_unmap_unlock(pte, ptl); | ||
| 308 | out: | 319 | out: |
| 309 | return referenced; | 320 | return referenced; |
| 310 | } | 321 | } |
| @@ -434,15 +445,11 @@ int page_referenced(struct page *page, int is_locked, int ignore_token) | |||
| 434 | * @vma: the vm area in which the mapping is added | 445 | * @vma: the vm area in which the mapping is added |
| 435 | * @address: the user virtual address mapped | 446 | * @address: the user virtual address mapped |
| 436 | * | 447 | * |
| 437 | * The caller needs to hold the mm->page_table_lock. | 448 | * The caller needs to hold the pte lock. |
| 438 | */ | 449 | */ |
| 439 | void page_add_anon_rmap(struct page *page, | 450 | void page_add_anon_rmap(struct page *page, |
| 440 | struct vm_area_struct *vma, unsigned long address) | 451 | struct vm_area_struct *vma, unsigned long address) |
| 441 | { | 452 | { |
| 442 | BUG_ON(PageReserved(page)); | ||
| 443 | |||
| 444 | inc_mm_counter(vma->vm_mm, anon_rss); | ||
| 445 | |||
| 446 | if (atomic_inc_and_test(&page->_mapcount)) { | 453 | if (atomic_inc_and_test(&page->_mapcount)) { |
| 447 | struct anon_vma *anon_vma = vma->anon_vma; | 454 | struct anon_vma *anon_vma = vma->anon_vma; |
| 448 | 455 | ||
| @@ -461,13 +468,12 @@ void page_add_anon_rmap(struct page *page, | |||
| 461 | * page_add_file_rmap - add pte mapping to a file page | 468 | * page_add_file_rmap - add pte mapping to a file page |
| 462 | * @page: the page to add the mapping to | 469 | * @page: the page to add the mapping to |
| 463 | * | 470 | * |
| 464 | * The caller needs to hold the mm->page_table_lock. | 471 | * The caller needs to hold the pte lock. |
| 465 | */ | 472 | */ |
| 466 | void page_add_file_rmap(struct page *page) | 473 | void page_add_file_rmap(struct page *page) |
| 467 | { | 474 | { |
| 468 | BUG_ON(PageAnon(page)); | 475 | BUG_ON(PageAnon(page)); |
| 469 | if (!pfn_valid(page_to_pfn(page)) || PageReserved(page)) | 476 | BUG_ON(!pfn_valid(page_to_pfn(page))); |
| 470 | return; | ||
| 471 | 477 | ||
| 472 | if (atomic_inc_and_test(&page->_mapcount)) | 478 | if (atomic_inc_and_test(&page->_mapcount)) |
| 473 | inc_page_state(nr_mapped); | 479 | inc_page_state(nr_mapped); |
| @@ -477,12 +483,10 @@ void page_add_file_rmap(struct page *page) | |||
| 477 | * page_remove_rmap - take down pte mapping from a page | 483 | * page_remove_rmap - take down pte mapping from a page |
| 478 | * @page: page to remove mapping from | 484 | * @page: page to remove mapping from |
| 479 | * | 485 | * |
| 480 | * Caller needs to hold the mm->page_table_lock. | 486 | * The caller needs to hold the pte lock. |
| 481 | */ | 487 | */ |
| 482 | void page_remove_rmap(struct page *page) | 488 | void page_remove_rmap(struct page *page) |
| 483 | { | 489 | { |
| 484 | BUG_ON(PageReserved(page)); | ||
| 485 | |||
| 486 | if (atomic_add_negative(-1, &page->_mapcount)) { | 490 | if (atomic_add_negative(-1, &page->_mapcount)) { |
| 487 | BUG_ON(page_mapcount(page) < 0); | 491 | BUG_ON(page_mapcount(page) < 0); |
| 488 | /* | 492 | /* |
| @@ -510,14 +514,15 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) | |||
| 510 | unsigned long address; | 514 | unsigned long address; |
| 511 | pte_t *pte; | 515 | pte_t *pte; |
| 512 | pte_t pteval; | 516 | pte_t pteval; |
| 517 | spinlock_t *ptl; | ||
| 513 | int ret = SWAP_AGAIN; | 518 | int ret = SWAP_AGAIN; |
| 514 | 519 | ||
| 515 | address = vma_address(page, vma); | 520 | address = vma_address(page, vma); |
| 516 | if (address == -EFAULT) | 521 | if (address == -EFAULT) |
| 517 | goto out; | 522 | goto out; |
| 518 | 523 | ||
| 519 | pte = page_check_address(page, mm, address); | 524 | pte = page_check_address(page, mm, address, &ptl); |
| 520 | if (IS_ERR(pte)) | 525 | if (!pte) |
| 521 | goto out; | 526 | goto out; |
| 522 | 527 | ||
| 523 | /* | 528 | /* |
| @@ -541,8 +546,11 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) | |||
| 541 | if (pte_dirty(pteval)) | 546 | if (pte_dirty(pteval)) |
| 542 | set_page_dirty(page); | 547 | set_page_dirty(page); |
| 543 | 548 | ||
| 549 | /* Update high watermark before we lower rss */ | ||
| 550 | update_hiwater_rss(mm); | ||
| 551 | |||
| 544 | if (PageAnon(page)) { | 552 | if (PageAnon(page)) { |
| 545 | swp_entry_t entry = { .val = page->private }; | 553 | swp_entry_t entry = { .val = page_private(page) }; |
| 546 | /* | 554 | /* |
| 547 | * Store the swap location in the pte. | 555 | * Store the swap location in the pte. |
| 548 | * See handle_pte_fault() ... | 556 | * See handle_pte_fault() ... |
| @@ -551,21 +559,21 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) | |||
| 551 | swap_duplicate(entry); | 559 | swap_duplicate(entry); |
| 552 | if (list_empty(&mm->mmlist)) { | 560 | if (list_empty(&mm->mmlist)) { |
| 553 | spin_lock(&mmlist_lock); | 561 | spin_lock(&mmlist_lock); |
| 554 | list_add(&mm->mmlist, &init_mm.mmlist); | 562 | if (list_empty(&mm->mmlist)) |
| 563 | list_add(&mm->mmlist, &init_mm.mmlist); | ||
| 555 | spin_unlock(&mmlist_lock); | 564 | spin_unlock(&mmlist_lock); |
| 556 | } | 565 | } |
| 557 | set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); | 566 | set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); |
| 558 | BUG_ON(pte_file(*pte)); | 567 | BUG_ON(pte_file(*pte)); |
| 559 | dec_mm_counter(mm, anon_rss); | 568 | dec_mm_counter(mm, anon_rss); |
| 560 | } | 569 | } else |
| 570 | dec_mm_counter(mm, file_rss); | ||
| 561 | 571 | ||
| 562 | dec_mm_counter(mm, rss); | ||
| 563 | page_remove_rmap(page); | 572 | page_remove_rmap(page); |
| 564 | page_cache_release(page); | 573 | page_cache_release(page); |
| 565 | 574 | ||
| 566 | out_unmap: | 575 | out_unmap: |
| 567 | pte_unmap(pte); | 576 | pte_unmap_unlock(pte, ptl); |
| 568 | spin_unlock(&mm->page_table_lock); | ||
| 569 | out: | 577 | out: |
| 570 | return ret; | 578 | return ret; |
| 571 | } | 579 | } |
| @@ -599,19 +607,14 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
| 599 | pgd_t *pgd; | 607 | pgd_t *pgd; |
| 600 | pud_t *pud; | 608 | pud_t *pud; |
| 601 | pmd_t *pmd; | 609 | pmd_t *pmd; |
| 602 | pte_t *pte, *original_pte; | 610 | pte_t *pte; |
| 603 | pte_t pteval; | 611 | pte_t pteval; |
| 612 | spinlock_t *ptl; | ||
| 604 | struct page *page; | 613 | struct page *page; |
| 605 | unsigned long address; | 614 | unsigned long address; |
| 606 | unsigned long end; | 615 | unsigned long end; |
| 607 | unsigned long pfn; | 616 | unsigned long pfn; |
| 608 | 617 | ||
| 609 | /* | ||
| 610 | * We need the page_table_lock to protect us from page faults, | ||
| 611 | * munmap, fork, etc... | ||
| 612 | */ | ||
| 613 | spin_lock(&mm->page_table_lock); | ||
| 614 | |||
| 615 | address = (vma->vm_start + cursor) & CLUSTER_MASK; | 618 | address = (vma->vm_start + cursor) & CLUSTER_MASK; |
| 616 | end = address + CLUSTER_SIZE; | 619 | end = address + CLUSTER_SIZE; |
| 617 | if (address < vma->vm_start) | 620 | if (address < vma->vm_start) |
| @@ -621,30 +624,33 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
| 621 | 624 | ||
| 622 | pgd = pgd_offset(mm, address); | 625 | pgd = pgd_offset(mm, address); |
| 623 | if (!pgd_present(*pgd)) | 626 | if (!pgd_present(*pgd)) |
| 624 | goto out_unlock; | 627 | return; |
| 625 | 628 | ||
| 626 | pud = pud_offset(pgd, address); | 629 | pud = pud_offset(pgd, address); |
| 627 | if (!pud_present(*pud)) | 630 | if (!pud_present(*pud)) |
| 628 | goto out_unlock; | 631 | return; |
| 629 | 632 | ||
| 630 | pmd = pmd_offset(pud, address); | 633 | pmd = pmd_offset(pud, address); |
| 631 | if (!pmd_present(*pmd)) | 634 | if (!pmd_present(*pmd)) |
| 632 | goto out_unlock; | 635 | return; |
| 636 | |||
| 637 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); | ||
| 633 | 638 | ||
| 634 | for (original_pte = pte = pte_offset_map(pmd, address); | 639 | /* Update high watermark before we lower rss */ |
| 635 | address < end; pte++, address += PAGE_SIZE) { | 640 | update_hiwater_rss(mm); |
| 636 | 641 | ||
| 642 | for (; address < end; pte++, address += PAGE_SIZE) { | ||
| 637 | if (!pte_present(*pte)) | 643 | if (!pte_present(*pte)) |
| 638 | continue; | 644 | continue; |
| 639 | 645 | ||
| 640 | pfn = pte_pfn(*pte); | 646 | pfn = pte_pfn(*pte); |
| 641 | if (!pfn_valid(pfn)) | 647 | if (unlikely(!pfn_valid(pfn))) { |
| 648 | print_bad_pte(vma, *pte, address); | ||
| 642 | continue; | 649 | continue; |
| 650 | } | ||
| 643 | 651 | ||
| 644 | page = pfn_to_page(pfn); | 652 | page = pfn_to_page(pfn); |
| 645 | BUG_ON(PageAnon(page)); | 653 | BUG_ON(PageAnon(page)); |
| 646 | if (PageReserved(page)) | ||
| 647 | continue; | ||
| 648 | 654 | ||
| 649 | if (ptep_clear_flush_young(vma, address, pte)) | 655 | if (ptep_clear_flush_young(vma, address, pte)) |
| 650 | continue; | 656 | continue; |
| @@ -663,13 +669,10 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
| 663 | 669 | ||
| 664 | page_remove_rmap(page); | 670 | page_remove_rmap(page); |
| 665 | page_cache_release(page); | 671 | page_cache_release(page); |
| 666 | dec_mm_counter(mm, rss); | 672 | dec_mm_counter(mm, file_rss); |
| 667 | (*mapcount)--; | 673 | (*mapcount)--; |
| 668 | } | 674 | } |
| 669 | 675 | pte_unmap_unlock(pte - 1, ptl); | |
| 670 | pte_unmap(original_pte); | ||
| 671 | out_unlock: | ||
| 672 | spin_unlock(&mm->page_table_lock); | ||
| 673 | } | 676 | } |
| 674 | 677 | ||
| 675 | static int try_to_unmap_anon(struct page *page) | 678 | static int try_to_unmap_anon(struct page *page) |
| @@ -806,7 +809,6 @@ int try_to_unmap(struct page *page) | |||
| 806 | { | 809 | { |
| 807 | int ret; | 810 | int ret; |
| 808 | 811 | ||
| 809 | BUG_ON(PageReserved(page)); | ||
| 810 | BUG_ON(!PageLocked(page)); | 812 | BUG_ON(!PageLocked(page)); |
| 811 | 813 | ||
| 812 | if (PageAnon(page)) | 814 | if (PageAnon(page)) |
