diff options
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 134 |
1 files changed, 77 insertions, 57 deletions
@@ -20,20 +20,20 @@ | |||
20 | /* | 20 | /* |
21 | * Lock ordering in mm: | 21 | * Lock ordering in mm: |
22 | * | 22 | * |
23 | * inode->i_sem (while writing or truncating, not reading or faulting) | 23 | * inode->i_mutex (while writing or truncating, not reading or faulting) |
24 | * inode->i_alloc_sem | 24 | * inode->i_alloc_sem |
25 | * | 25 | * |
26 | * When a page fault occurs in writing from user to file, down_read | 26 | * When a page fault occurs in writing from user to file, down_read |
27 | * of mmap_sem nests within i_sem; in sys_msync, i_sem nests within | 27 | * of mmap_sem nests within i_mutex; in sys_msync, i_mutex nests within |
28 | * down_read of mmap_sem; i_sem and down_write of mmap_sem are never | 28 | * down_read of mmap_sem; i_mutex and down_write of mmap_sem are never |
29 | * taken together; in truncation, i_sem is taken outermost. | 29 | * taken together; in truncation, i_mutex is taken outermost. |
30 | * | 30 | * |
31 | * mm->mmap_sem | 31 | * mm->mmap_sem |
32 | * page->flags PG_locked (lock_page) | 32 | * page->flags PG_locked (lock_page) |
33 | * mapping->i_mmap_lock | 33 | * mapping->i_mmap_lock |
34 | * anon_vma->lock | 34 | * anon_vma->lock |
35 | * mm->page_table_lock or pte_lock | 35 | * mm->page_table_lock or pte_lock |
36 | * zone->lru_lock (in mark_page_accessed) | 36 | * zone->lru_lock (in mark_page_accessed, isolate_lru_page) |
37 | * swap_lock (in swap_duplicate, swap_info_get) | 37 | * swap_lock (in swap_duplicate, swap_info_get) |
38 | * mmlist_lock (in mmput, drain_mmlist and others) | 38 | * mmlist_lock (in mmput, drain_mmlist and others) |
39 | * mapping->private_lock (in __set_page_dirty_buffers) | 39 | * mapping->private_lock (in __set_page_dirty_buffers) |
@@ -225,7 +225,7 @@ vma_address(struct page *page, struct vm_area_struct *vma) | |||
225 | 225 | ||
226 | /* | 226 | /* |
227 | * At what user virtual address is page expected in vma? checking that the | 227 | * At what user virtual address is page expected in vma? checking that the |
228 | * page matches the vma: currently only used by unuse_process, on anon pages. | 228 | * page matches the vma: currently only used on anon pages, by unuse_vma; |
229 | */ | 229 | */ |
230 | unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) | 230 | unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) |
231 | { | 231 | { |
@@ -234,7 +234,8 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) | |||
234 | (void *)page->mapping - PAGE_MAPPING_ANON) | 234 | (void *)page->mapping - PAGE_MAPPING_ANON) |
235 | return -EFAULT; | 235 | return -EFAULT; |
236 | } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { | 236 | } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { |
237 | if (vma->vm_file->f_mapping != page->mapping) | 237 | if (!vma->vm_file || |
238 | vma->vm_file->f_mapping != page->mapping) | ||
238 | return -EFAULT; | 239 | return -EFAULT; |
239 | } else | 240 | } else |
240 | return -EFAULT; | 241 | return -EFAULT; |
@@ -289,7 +290,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm, | |||
289 | * repeatedly from either page_referenced_anon or page_referenced_file. | 290 | * repeatedly from either page_referenced_anon or page_referenced_file. |
290 | */ | 291 | */ |
291 | static int page_referenced_one(struct page *page, | 292 | static int page_referenced_one(struct page *page, |
292 | struct vm_area_struct *vma, unsigned int *mapcount, int ignore_token) | 293 | struct vm_area_struct *vma, unsigned int *mapcount) |
293 | { | 294 | { |
294 | struct mm_struct *mm = vma->vm_mm; | 295 | struct mm_struct *mm = vma->vm_mm; |
295 | unsigned long address; | 296 | unsigned long address; |
@@ -310,7 +311,7 @@ static int page_referenced_one(struct page *page, | |||
310 | 311 | ||
311 | /* Pretend the page is referenced if the task has the | 312 | /* Pretend the page is referenced if the task has the |
312 | swap token and is in the middle of a page fault. */ | 313 | swap token and is in the middle of a page fault. */ |
313 | if (mm != current->mm && !ignore_token && has_swap_token(mm) && | 314 | if (mm != current->mm && has_swap_token(mm) && |
314 | rwsem_is_locked(&mm->mmap_sem)) | 315 | rwsem_is_locked(&mm->mmap_sem)) |
315 | referenced++; | 316 | referenced++; |
316 | 317 | ||
@@ -320,7 +321,7 @@ out: | |||
320 | return referenced; | 321 | return referenced; |
321 | } | 322 | } |
322 | 323 | ||
323 | static int page_referenced_anon(struct page *page, int ignore_token) | 324 | static int page_referenced_anon(struct page *page) |
324 | { | 325 | { |
325 | unsigned int mapcount; | 326 | unsigned int mapcount; |
326 | struct anon_vma *anon_vma; | 327 | struct anon_vma *anon_vma; |
@@ -333,8 +334,7 @@ static int page_referenced_anon(struct page *page, int ignore_token) | |||
333 | 334 | ||
334 | mapcount = page_mapcount(page); | 335 | mapcount = page_mapcount(page); |
335 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 336 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { |
336 | referenced += page_referenced_one(page, vma, &mapcount, | 337 | referenced += page_referenced_one(page, vma, &mapcount); |
337 | ignore_token); | ||
338 | if (!mapcount) | 338 | if (!mapcount) |
339 | break; | 339 | break; |
340 | } | 340 | } |
@@ -353,7 +353,7 @@ static int page_referenced_anon(struct page *page, int ignore_token) | |||
353 | * | 353 | * |
354 | * This function is only called from page_referenced for object-based pages. | 354 | * This function is only called from page_referenced for object-based pages. |
355 | */ | 355 | */ |
356 | static int page_referenced_file(struct page *page, int ignore_token) | 356 | static int page_referenced_file(struct page *page) |
357 | { | 357 | { |
358 | unsigned int mapcount; | 358 | unsigned int mapcount; |
359 | struct address_space *mapping = page->mapping; | 359 | struct address_space *mapping = page->mapping; |
@@ -391,8 +391,7 @@ static int page_referenced_file(struct page *page, int ignore_token) | |||
391 | referenced++; | 391 | referenced++; |
392 | break; | 392 | break; |
393 | } | 393 | } |
394 | referenced += page_referenced_one(page, vma, &mapcount, | 394 | referenced += page_referenced_one(page, vma, &mapcount); |
395 | ignore_token); | ||
396 | if (!mapcount) | 395 | if (!mapcount) |
397 | break; | 396 | break; |
398 | } | 397 | } |
@@ -409,13 +408,10 @@ static int page_referenced_file(struct page *page, int ignore_token) | |||
409 | * Quick test_and_clear_referenced for all mappings to a page, | 408 | * Quick test_and_clear_referenced for all mappings to a page, |
410 | * returns the number of ptes which referenced the page. | 409 | * returns the number of ptes which referenced the page. |
411 | */ | 410 | */ |
412 | int page_referenced(struct page *page, int is_locked, int ignore_token) | 411 | int page_referenced(struct page *page, int is_locked) |
413 | { | 412 | { |
414 | int referenced = 0; | 413 | int referenced = 0; |
415 | 414 | ||
416 | if (!swap_token_default_timeout) | ||
417 | ignore_token = 1; | ||
418 | |||
419 | if (page_test_and_clear_young(page)) | 415 | if (page_test_and_clear_young(page)) |
420 | referenced++; | 416 | referenced++; |
421 | 417 | ||
@@ -424,15 +420,14 @@ int page_referenced(struct page *page, int is_locked, int ignore_token) | |||
424 | 420 | ||
425 | if (page_mapped(page) && page->mapping) { | 421 | if (page_mapped(page) && page->mapping) { |
426 | if (PageAnon(page)) | 422 | if (PageAnon(page)) |
427 | referenced += page_referenced_anon(page, ignore_token); | 423 | referenced += page_referenced_anon(page); |
428 | else if (is_locked) | 424 | else if (is_locked) |
429 | referenced += page_referenced_file(page, ignore_token); | 425 | referenced += page_referenced_file(page); |
430 | else if (TestSetPageLocked(page)) | 426 | else if (TestSetPageLocked(page)) |
431 | referenced++; | 427 | referenced++; |
432 | else { | 428 | else { |
433 | if (page->mapping) | 429 | if (page->mapping) |
434 | referenced += page_referenced_file(page, | 430 | referenced += page_referenced_file(page); |
435 | ignore_token); | ||
436 | unlock_page(page); | 431 | unlock_page(page); |
437 | } | 432 | } |
438 | } | 433 | } |
@@ -440,6 +435,30 @@ int page_referenced(struct page *page, int is_locked, int ignore_token) | |||
440 | } | 435 | } |
441 | 436 | ||
442 | /** | 437 | /** |
438 | * page_set_anon_rmap - setup new anonymous rmap | ||
439 | * @page: the page to add the mapping to | ||
440 | * @vma: the vm area in which the mapping is added | ||
441 | * @address: the user virtual address mapped | ||
442 | */ | ||
443 | static void __page_set_anon_rmap(struct page *page, | ||
444 | struct vm_area_struct *vma, unsigned long address) | ||
445 | { | ||
446 | struct anon_vma *anon_vma = vma->anon_vma; | ||
447 | |||
448 | BUG_ON(!anon_vma); | ||
449 | anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; | ||
450 | page->mapping = (struct address_space *) anon_vma; | ||
451 | |||
452 | page->index = linear_page_index(vma, address); | ||
453 | |||
454 | /* | ||
455 | * nr_mapped state can be updated without turning off | ||
456 | * interrupts because it is not modified via interrupt. | ||
457 | */ | ||
458 | __inc_page_state(nr_mapped); | ||
459 | } | ||
460 | |||
461 | /** | ||
443 | * page_add_anon_rmap - add pte mapping to an anonymous page | 462 | * page_add_anon_rmap - add pte mapping to an anonymous page |
444 | * @page: the page to add the mapping to | 463 | * @page: the page to add the mapping to |
445 | * @vma: the vm area in which the mapping is added | 464 | * @vma: the vm area in which the mapping is added |
@@ -450,20 +469,27 @@ int page_referenced(struct page *page, int is_locked, int ignore_token) | |||
450 | void page_add_anon_rmap(struct page *page, | 469 | void page_add_anon_rmap(struct page *page, |
451 | struct vm_area_struct *vma, unsigned long address) | 470 | struct vm_area_struct *vma, unsigned long address) |
452 | { | 471 | { |
453 | if (atomic_inc_and_test(&page->_mapcount)) { | 472 | if (atomic_inc_and_test(&page->_mapcount)) |
454 | struct anon_vma *anon_vma = vma->anon_vma; | 473 | __page_set_anon_rmap(page, vma, address); |
455 | |||
456 | BUG_ON(!anon_vma); | ||
457 | anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; | ||
458 | page->mapping = (struct address_space *) anon_vma; | ||
459 | |||
460 | page->index = linear_page_index(vma, address); | ||
461 | |||
462 | inc_page_state(nr_mapped); | ||
463 | } | ||
464 | /* else checking page index and mapping is racy */ | 474 | /* else checking page index and mapping is racy */ |
465 | } | 475 | } |
466 | 476 | ||
477 | /* | ||
478 | * page_add_new_anon_rmap - add pte mapping to a new anonymous page | ||
479 | * @page: the page to add the mapping to | ||
480 | * @vma: the vm area in which the mapping is added | ||
481 | * @address: the user virtual address mapped | ||
482 | * | ||
483 | * Same as page_add_anon_rmap but must only be called on *new* pages. | ||
484 | * This means the inc-and-test can be bypassed. | ||
485 | */ | ||
486 | void page_add_new_anon_rmap(struct page *page, | ||
487 | struct vm_area_struct *vma, unsigned long address) | ||
488 | { | ||
489 | atomic_set(&page->_mapcount, 0); /* elevate count by 1 (starts at -1) */ | ||
490 | __page_set_anon_rmap(page, vma, address); | ||
491 | } | ||
492 | |||
467 | /** | 493 | /** |
468 | * page_add_file_rmap - add pte mapping to a file page | 494 | * page_add_file_rmap - add pte mapping to a file page |
469 | * @page: the page to add the mapping to | 495 | * @page: the page to add the mapping to |
@@ -476,7 +502,7 @@ void page_add_file_rmap(struct page *page) | |||
476 | BUG_ON(!pfn_valid(page_to_pfn(page))); | 502 | BUG_ON(!pfn_valid(page_to_pfn(page))); |
477 | 503 | ||
478 | if (atomic_inc_and_test(&page->_mapcount)) | 504 | if (atomic_inc_and_test(&page->_mapcount)) |
479 | inc_page_state(nr_mapped); | 505 | __inc_page_state(nr_mapped); |
480 | } | 506 | } |
481 | 507 | ||
482 | /** | 508 | /** |
@@ -488,6 +514,13 @@ void page_add_file_rmap(struct page *page) | |||
488 | void page_remove_rmap(struct page *page) | 514 | void page_remove_rmap(struct page *page) |
489 | { | 515 | { |
490 | if (atomic_add_negative(-1, &page->_mapcount)) { | 516 | if (atomic_add_negative(-1, &page->_mapcount)) { |
517 | if (page_mapcount(page) < 0) { | ||
518 | printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page)); | ||
519 | printk (KERN_EMERG " page->flags = %lx\n", page->flags); | ||
520 | printk (KERN_EMERG " page->count = %x\n", page_count(page)); | ||
521 | printk (KERN_EMERG " page->mapping = %p\n", page->mapping); | ||
522 | } | ||
523 | |||
491 | BUG_ON(page_mapcount(page) < 0); | 524 | BUG_ON(page_mapcount(page) < 0); |
492 | /* | 525 | /* |
493 | * It would be tidy to reset the PageAnon mapping here, | 526 | * It would be tidy to reset the PageAnon mapping here, |
@@ -500,7 +533,7 @@ void page_remove_rmap(struct page *page) | |||
500 | */ | 533 | */ |
501 | if (page_test_and_clear_dirty(page)) | 534 | if (page_test_and_clear_dirty(page)) |
502 | set_page_dirty(page); | 535 | set_page_dirty(page); |
503 | dec_page_state(nr_mapped); | 536 | __dec_page_state(nr_mapped); |
504 | } | 537 | } |
505 | } | 538 | } |
506 | 539 | ||
@@ -529,10 +562,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) | |||
529 | * If the page is mlock()d, we cannot swap it out. | 562 | * If the page is mlock()d, we cannot swap it out. |
530 | * If it's recently referenced (perhaps page_referenced | 563 | * If it's recently referenced (perhaps page_referenced |
531 | * skipped over this mm) then we should reactivate it. | 564 | * skipped over this mm) then we should reactivate it. |
532 | * | ||
533 | * Pages belonging to VM_RESERVED regions should not happen here. | ||
534 | */ | 565 | */ |
535 | if ((vma->vm_flags & (VM_LOCKED|VM_RESERVED)) || | 566 | if ((vma->vm_flags & VM_LOCKED) || |
536 | ptep_clear_flush_young(vma, address, pte)) { | 567 | ptep_clear_flush_young(vma, address, pte)) { |
537 | ret = SWAP_FAIL; | 568 | ret = SWAP_FAIL; |
538 | goto out_unmap; | 569 | goto out_unmap; |
@@ -613,7 +644,6 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
613 | struct page *page; | 644 | struct page *page; |
614 | unsigned long address; | 645 | unsigned long address; |
615 | unsigned long end; | 646 | unsigned long end; |
616 | unsigned long pfn; | ||
617 | 647 | ||
618 | address = (vma->vm_start + cursor) & CLUSTER_MASK; | 648 | address = (vma->vm_start + cursor) & CLUSTER_MASK; |
619 | end = address + CLUSTER_SIZE; | 649 | end = address + CLUSTER_SIZE; |
@@ -642,21 +672,14 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
642 | for (; address < end; pte++, address += PAGE_SIZE) { | 672 | for (; address < end; pte++, address += PAGE_SIZE) { |
643 | if (!pte_present(*pte)) | 673 | if (!pte_present(*pte)) |
644 | continue; | 674 | continue; |
645 | 675 | page = vm_normal_page(vma, address, *pte); | |
646 | pfn = pte_pfn(*pte); | 676 | BUG_ON(!page || PageAnon(page)); |
647 | if (unlikely(!pfn_valid(pfn))) { | ||
648 | print_bad_pte(vma, *pte, address); | ||
649 | continue; | ||
650 | } | ||
651 | |||
652 | page = pfn_to_page(pfn); | ||
653 | BUG_ON(PageAnon(page)); | ||
654 | 677 | ||
655 | if (ptep_clear_flush_young(vma, address, pte)) | 678 | if (ptep_clear_flush_young(vma, address, pte)) |
656 | continue; | 679 | continue; |
657 | 680 | ||
658 | /* Nuke the page table entry. */ | 681 | /* Nuke the page table entry. */ |
659 | flush_cache_page(vma, address, pfn); | 682 | flush_cache_page(vma, address, pte_pfn(*pte)); |
660 | pteval = ptep_clear_flush(vma, address, pte); | 683 | pteval = ptep_clear_flush(vma, address, pte); |
661 | 684 | ||
662 | /* If nonlinear, store the file page offset in the pte. */ | 685 | /* If nonlinear, store the file page offset in the pte. */ |
@@ -727,7 +750,7 @@ static int try_to_unmap_file(struct page *page) | |||
727 | 750 | ||
728 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, | 751 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, |
729 | shared.vm_set.list) { | 752 | shared.vm_set.list) { |
730 | if (vma->vm_flags & (VM_LOCKED|VM_RESERVED)) | 753 | if (vma->vm_flags & VM_LOCKED) |
731 | continue; | 754 | continue; |
732 | cursor = (unsigned long) vma->vm_private_data; | 755 | cursor = (unsigned long) vma->vm_private_data; |
733 | if (cursor > max_nl_cursor) | 756 | if (cursor > max_nl_cursor) |
@@ -761,7 +784,7 @@ static int try_to_unmap_file(struct page *page) | |||
761 | do { | 784 | do { |
762 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, | 785 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, |
763 | shared.vm_set.list) { | 786 | shared.vm_set.list) { |
764 | if (vma->vm_flags & (VM_LOCKED|VM_RESERVED)) | 787 | if (vma->vm_flags & VM_LOCKED) |
765 | continue; | 788 | continue; |
766 | cursor = (unsigned long) vma->vm_private_data; | 789 | cursor = (unsigned long) vma->vm_private_data; |
767 | while ( cursor < max_nl_cursor && | 790 | while ( cursor < max_nl_cursor && |
@@ -783,11 +806,8 @@ static int try_to_unmap_file(struct page *page) | |||
783 | * in locked vmas). Reset cursor on all unreserved nonlinear | 806 | * in locked vmas). Reset cursor on all unreserved nonlinear |
784 | * vmas, now forgetting on which ones it had fallen behind. | 807 | * vmas, now forgetting on which ones it had fallen behind. |
785 | */ | 808 | */ |
786 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, | 809 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) |
787 | shared.vm_set.list) { | 810 | vma->vm_private_data = NULL; |
788 | if (!(vma->vm_flags & VM_RESERVED)) | ||
789 | vma->vm_private_data = NULL; | ||
790 | } | ||
791 | out: | 811 | out: |
792 | spin_unlock(&mapping->i_mmap_lock); | 812 | spin_unlock(&mapping->i_mmap_lock); |
793 | return ret; | 813 | return ret; |